# Pandas Introduction


In [10]:
import pandas as pd

## Dataframe 


Dataframe is a main object in pandas. It is used to represent data with rows and columns.
Every value with a column has the same data type. Different columns can contain different data types.

Columns in dataframes are of type 'series'.


In [21]:
df = pd.read_csv('weather.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


## Some Basic Methods - 

In [24]:
# head() - returns first 5 rows of dataframe
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain


In [31]:
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny


In [33]:
#tail(n) - prints last n rows
df.tail(4)

Unnamed: 0,day,temperature,windspeed,event
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


In [26]:
# shape - tells the dimension of dataframe
df.shape

(6, 4)

In [30]:
rows, columns = df.shape
rows, columns

(6, 4)

In [36]:
df[2:5]      # prints serial no. from 2 to 4

Unnamed: 0,day,temperature,windspeed,event
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain


In [38]:
# print columns
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [39]:
# df.column_name - for printing individual column
df.event

0     rain
1    sunny
2     snow
3     snow
4     rain
5    sunny
Name: event, dtype: object

In [41]:
# df['column_name'] - also returns column values
df['day']

0    01/01/17
1    01/02/17
2    01/03/17
3    01/04/17
4    01/05/17
5    01/06/17
Name: day, dtype: object

In [43]:
# type
type(df['event'])

pandas.core.series.Series

In [44]:
# print only few columns
df[['event','day']]

Unnamed: 0,event,day
0,rain,01/01/17
1,sunny,01/02/17
2,snow,01/03/17
3,snow,01/04/17
4,rain,01/05/17
5,sunny,01/06/17


## Basic Operations on Dataframes

In [45]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


### 1. max()

In [46]:
df['temperature'].max()

35

### 2. min()

In [47]:
df['temperature'].min()

24

### 3. mean()

In [48]:
df['windspeed'].mean()

4.666666666666667

### 4. std() - Standard Deviation 

In [50]:
df['temperature'].std()

3.8858718455450894

### 5. describe() - prints the statistics on our dataset

In [51]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.5,4.666667
std,3.885872,2.33809
min,24.0,2.0
25%,29.0,2.5
50%,32.0,5.0
75%,32.0,6.75
max,35.0,7.0


------------------------------------------------------

In [66]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


In [57]:
# find the row(s) where temperature < 32
df[df.temperature<32]

Unnamed: 0,day,temperature,windspeed,event
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow


In [58]:
# find a row where temperature is maximum
df[df.temperature == df.temperature.max()]

Unnamed: 0,day,temperature,windspeed,event
1,01/02/17,35,7,sunny


In [59]:
# find a row where temperature is minimum
df[df.temperature == df['temperature'].min()]

Unnamed: 0,day,temperature,windspeed,event
3,01/04/17,24,7,snow


In [64]:
# print the day when temperature was greater than 28
df[['day','temperature']][df.temperature>28]

Unnamed: 0,day,temperature
0,01/01/17,32
1,01/02/17,35
4,01/05/17,32
5,01/06/17,32


------------------------------------------------------------------------------------------

In [67]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


In [68]:
df.index

RangeIndex(start=0, stop=6, step=1)

### Change Index

In [None]:
df.set_index('day')

In [82]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01/01/17,32,6,rain
01/02/17,35,7,sunny
01/03/17,28,2,snow
01/04/17,24,7,snow
01/05/17,32,4,rain
01/06/17,32,2,sunny


In [87]:
df.reset_index(inplace=True)

In [88]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


In [90]:
df.loc[1]

day            01/02/17
temperature          35
windspeed             7
event             sunny
Name: 1, dtype: object

In [93]:
df.set_index('day', inplace=True)

In [94]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01/01/17,32,6,rain
01/02/17,35,7,sunny
01/03/17,28,2,snow
01/04/17,24,7,snow
01/05/17,32,4,rain
01/06/17,32,2,sunny


In [96]:
df.reset_index(inplace=True)

In [97]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,rain
1,01/02/17,35,7,sunny
2,01/03/17,28,2,snow
3,01/04/17,24,7,snow
4,01/05/17,32,4,rain
5,01/06/17,32,2,sunny


In [98]:
df.set_index('event',inplace=True)

In [99]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rain,01/01/17,32,6
sunny,01/02/17,35,7
snow,01/03/17,28,2
snow,01/04/17,24,7
rain,01/05/17,32,4
sunny,01/06/17,32,2


In [100]:
df.loc['snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
snow,01/03/17,28,2
snow,01/04/17,24,7
