In [1]:
import pandas as pd

### pandas dataframe can be created using dictionary

In [2]:
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}

In [3]:
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


### changing column names

In [4]:
df = pd.DataFrame(weather_data,columns=['place','day','temperature','windspeed','event'])
df

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny
2,,1/3/2017,28,2,Snow
3,,1/4/2017,24,7,Snow
4,,1/5/2017,32,4,Rain
5,,1/6/2017,31,2,Sunny


In [5]:
df.shape

(6, 5)

In [6]:
df.dtypes

place          object
day            object
temperature     int64
windspeed       int64
event          object
dtype: object

In [7]:
df.columns

Index(['place', 'day', 'temperature', 'windspeed', 'event'], dtype='object')

In [8]:
df.head()

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny
2,,1/3/2017,28,2,Snow
3,,1/4/2017,24,7,Snow
4,,1/5/2017,32,4,Rain


In [9]:
df.head(3)

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny
2,,1/3/2017,28,2,Snow


In [10]:
df.tail(2)

Unnamed: 0,place,day,temperature,windspeed,event
4,,1/5/2017,32,4,Rain
5,,1/6/2017,31,2,Sunny


In [11]:
df[2:4]

Unnamed: 0,place,day,temperature,windspeed,event
2,,1/3/2017,28,2,Snow
3,,1/4/2017,24,7,Snow


In [12]:
df[:]

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny
2,,1/3/2017,28,2,Snow
3,,1/4/2017,24,7,Snow
4,,1/5/2017,32,4,Rain
5,,1/6/2017,31,2,Sunny


In [13]:
df[:2]

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny


In [14]:
df[2:5][1:4]

Unnamed: 0,place,day,temperature,windspeed,event
3,,1/4/2017,24,7,Snow
4,,1/5/2017,32,4,Rain


In [15]:
df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [16]:
df[['day']]

Unnamed: 0,day
0,1/1/2017
1,1/2/2017
2,1/3/2017
3,1/4/2017
4,1/5/2017
5,1/6/2017


In [17]:
df[['day', 'event']]

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Rain
5,1/6/2017,Sunny


In [18]:
type(df)

pandas.core.frame.DataFrame

In [19]:
type(df.day)

pandas.core.series.Series

In [20]:
type(df[['day']])

pandas.core.frame.DataFrame

In [21]:
type(df[['day', 'event']])

pandas.core.frame.DataFrame

In [22]:
type(df['day'])

pandas.core.series.Series

### Note df[['     ']] gives dataframe while df['   '] gives series

### Sum commonly used functions

In [23]:
print(df['temperature'].max())

35


In [24]:
df['temperature'].mean()

30.333333333333332

In [25]:
df['temperature'].median()

31.5

In [26]:
df['temperature'].std()

3.8297084310253524

###  Better option 

In [27]:
df['temperature'].describe()

count     6.000000
mean     30.333333
std       3.829708
min      24.000000
25%      28.750000
50%      31.500000
75%      32.000000
max      35.000000
Name: temperature, dtype: float64

### mean, median, mode NaN values included, excluded in describe

In [28]:
df['temperature']>31

0     True
1     True
2    False
3    False
4     True
5    False
Name: temperature, dtype: bool

In [29]:
df[df['temperature']>31]

Unnamed: 0,place,day,temperature,windspeed,event
0,,1/1/2017,32,6,Rain
1,,1/2/2017,35,7,Sunny
4,,1/5/2017,32,4,Rain


In [30]:
df[df['temperature'] == df['temperature'].max()]

Unnamed: 0,place,day,temperature,windspeed,event
1,,1/2/2017,35,7,Sunny


In [31]:
df[df['temperature'] == df['temperature'].max()][['day', 'temperature']]

Unnamed: 0,day,temperature
1,1/2/2017,35


### setting index

In [32]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [33]:
df.set_index('day', inplace=True) #inplace true means modify thee original dataframe

In [34]:
df

Unnamed: 0_level_0,place,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/1/2017,,32,6,Rain
1/2/2017,,35,7,Sunny
1/3/2017,,28,2,Snow
1/4/2017,,24,7,Snow
1/5/2017,,32,4,Rain
1/6/2017,,31,2,Sunny


### by having row indexes now we can use loc function

In [35]:
df.loc['1/1/2017']

place           NaN
temperature      32
windspeed         6
event          Rain
Name: 1/1/2017, dtype: object

In [36]:
df.iloc[0]

place           NaN
temperature      32
windspeed         6
event          Rain
Name: 1/1/2017, dtype: object

### to remove index

In [37]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,place,temperature,windspeed,event
0,1/1/2017,,32,6,Rain
1,1/2/2017,,35,7,Sunny
2,1/3/2017,,28,2,Snow
3,1/4/2017,,24,7,Snow
4,1/5/2017,,32,4,Rain
5,1/6/2017,,31,2,Sunny


### finding corelations

In [38]:
df.corr()   #used to find how strongly connected 2 columns are

Unnamed: 0,temperature,windspeed
temperature,1.0,0.037226
windspeed,0.037226,1.0


In [39]:
df.corr()['temperature'].sort_values(ascending=False)

temperature    1.000000
windspeed      0.037226
Name: temperature, dtype: float64