In [69]:
# What is Dataframe ? 
'''
Dataframe is a main object in Pandas. It is used to represent data with rows and columns.
A DataFrame is a data structure that organizes data into a 2-dimensional table of rows and columns, 
much like a spreadsheet. DataFrames are one of the most common data structures used in modern data analytics 
because they are a flexible and intuitive way of storing and working with data.
'''
'''
Topics Covered 
1. Loading Dataset from a Dictionary
2. Dataframe shape
3. Dataframe head - tail
4. Printing using indexes
5. Accessing Rows
6. Accessing Columns
7. Operations - min, max, mean, describe
8. Types
9. Conditional operation
10. Changing the index
11. Use of loc[]
12. Reset the index
'''


'\nTopics Covered \n1. Loading Dataset from a Dictionary\n2. Dataframe shape\n3. Dataframe head - tail\n4. Printing using indexes\n5. Accessing Rows\n. Conditional operation\n'

In [2]:
import pandas as pd
df = pd.read_csv('data/weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:
# loading dataframe using dictionary
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [8]:
# get the dimension of the dataframe (row, column)
df.shape
row, col = df.shape

In [9]:
row

6

In [12]:
# printing only first few rows head(row)
df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [14]:
# printing only last few rows head(row)
df.tail(3)

Unnamed: 0,day,temperature,windspeed,event
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [17]:
# printing only selected rows df[from_row:to_row]
# note - to_row will always print one row less. example to_row 3 will print upto 2 rows
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [19]:
# to print everything use either df or df[:]
df[:]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [21]:
# accessing the columns 
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [22]:
df['day'] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [23]:
# getting types
type(df['day'])

pandas.core.series.Series

In [30]:
# accessing only selected columns
df[['day','temperature']]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


In [32]:
# maximum temperature
df.temperature.max()

35

In [33]:
# average wind speed
df['windspeed'].mean()

4.666666666666667

In [34]:
# describing the dataframe
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [39]:
# conditional operation
df[df['temperature']>30]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [40]:
df[df['temperature']==df['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [44]:
# conditional printing with custom columns
df[['event','temperature']][df['temperature']==df['temperature'].max()]

Unnamed: 0,event,temperature
1,Sunny,35


In [50]:
# changing the index

df.set_index('day', inplace=True)


Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [54]:
#  loc function to access the custom data  
df.loc['1/5/2017']

temperature      32
windspeed         4
event          Rain
Name: 1/5/2017, dtype: object

In [55]:
 df.loc['1/5/2017']['event']

'Rain'

In [58]:
# Reset Index
df.reset_index(inplace=True)

In [59]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:
df.set_index('event',inplace=True)


In [65]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [68]:
df.loc['Rain']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Rain,1/5/2017,32,4
