In [None]:
# What is Dataframe ? 
'''
Dataframe is a main object in Pandas. It is used to represent data with rows and columns.
A DataFrame is a data structure that organizes data into a 2-dimensional table of rows and columns, 
much like a spreadsheet. DataFrames are one of the most common data structures used in modern data analytics 
because they are a flexible and intuitive way of storing and working with data.
'''

In [1]:
import pandas as pd
df = pd.read_csv('data/weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:
# loading dataframe using dictionary
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [8]:
# get the dimension of the dataframe (row, column)
df.shape
row, col = df.shape

In [9]:
row

6

In [12]:
# printing only first few rows head(row)
df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [14]:
# printing only last few rows head(row)
df.tail(3)

Unnamed: 0,day,temperature,windspeed,event
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [17]:
# printing only selected rows df[from_row:to_row]
# note - to_row will always print one row less. example to_row 3 will print upto 2 rows
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [19]:
# to print everything use either df or df[:]
df[:]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [21]:
# accessing the columns 
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [22]:
df['day'] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [23]:
# getting types
type(df['day'])

pandas.core.series.Series

In [30]:
# accessing only selected columns
df[['day','temperature']]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


In [32]:
# maximum temperature
df.temperature.max()

35

In [33]:
# average wind speed
df['windspeed'].mean()

4.666666666666667

In [34]:
# describing the dataframe
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [39]:
# conditional printing
df[df['temperature']>30]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [40]:
df[df['temperature']==df['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
