## Create a data frame using csv

In [184]:
import pandas as pd
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


## Create a data frame using python dictionary 

In [185]:
dictionary = {

    'day': ['1/1/2017', '1/2/2017'],
    'temperature': [32,35]
}

df2 = pd.DataFrame(dictionary)
df2

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35


In [186]:
df.shape

(6, 4)

In [187]:
rows, columns = df.shape
rows

6

## Print only few of the starting rows

In [188]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [189]:
df.head(2) #print 2 rows

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [190]:
df.tail(4) #print last 4 rows

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [191]:
df[2:5]

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [192]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [193]:
df.day #print individual column using its key or title

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [194]:
type(df['event']) #what is the type of the columns ?

pandas.core.series.Series

## print only selective few columns

In [195]:
df[['event', 'day']]

Unnamed: 0,event,day
0,Rain,1/1/2017
1,Sunny,1/2/2017
2,Snow,1/3/2017
3,Snow,1/4/2017
4,Rain,1/5/2017
5,Sunny,1/6/2017


In [196]:
df['temperature'].max()

35

In [197]:
df['temperature'].min()

24

In [198]:
df['temperature'].mean()

30.333333333333332

In [199]:
df['temperature'].std() # standard deviation

3.8297084310253524

## Print statistics on entire data in one go

In [200]:
df.describe() # the 25, 50, 75 are percentile

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


### Give all the temperatures in the dataset where temp is greater than or equal to 32

In [201]:
df[df.temperature>=32]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain


In [202]:
df[df.temperature== df['temperature'].max()]  # where temprature is max

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [203]:
df[['day','temperature']][df.temperature== df['temperature'].max()]  # print day and temp where temprature is max

Unnamed: 0,day,temperature
1,1/2/2017,35


In [204]:
df.index

RangeIndex(start=0, stop=6, step=1)

### Change index to day

In [206]:
df.set_index('day',inplace=True) 
# Inplace = true replaces the original data frame otherwise it returns the new one
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [207]:
df.loc['1/3/2017']

temperature      28
windspeed         2
event          Snow
Name: 1/3/2017, dtype: object

In [209]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [210]:
df.set_index('event', inplace= True)
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [212]:
df.loc['Snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
