# Pandas Tutorial

### Note: Data Frame is main object in pandas.It is used to represent data with rows and colums 

#### Note: Process of cleaning a Messy data is called Data Munging or Data Wrangling

In [127]:
import pandas as pd
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2020,32,6,Rain
1,1/2/2020,35,7,Sunny
2,1/3/2020,28,2,Snow
3,1/4/2020,24,7,Snow
4,1/5/2020,32,4,Rain
5,1/6/2020,32,2,Sunny


### 1. Working with Rows and Columns in Data Frame

In [128]:
# Gives No of Rows and Cols in Data Frame
rows, cols = df.shape
print(f"No of Rows in Data Frame is {rows}")
print(f"No of Col in Data Frame is {cols}")

No of Rows in Data Frame is 6
No of Col in Data Frame is 4


In [129]:
# Give Top few Rows in Data Frame
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2020,32,6,Rain
1,1/2/2020,35,7,Sunny


In [130]:
# # Give Bottom few Rows in Data Frame
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2020,32,4,Rain
5,1/6/2020,32,2,Sunny


In [131]:
# Slicing of Dataframe
df[2:5:2]

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2020,28,2,Snow
4,1/5/2020,32,4,Rain


In [132]:
# Return Columns in Data Frame
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [133]:
# Some more Slicing in Data Frame
df[['day','temperature','event']][2:5]

Unnamed: 0,day,temperature,event
2,1/3/2020,28,Snow
3,1/4/2020,24,Snow
4,1/5/2020,32,Rain


### 2. Some Data Frame Statistical Function

In [134]:
print(f"The Max Tempreature is {df['temperature'].max()}")
print(f"The Min Tempreature is {df['temperature'].min()}")
print(f"The Mean Tempreature is {df['temperature'].mean()}")
print(f"The Max Tempreature is {df['temperature'].max()}")
print(f"The Standard Deviation in Tempreature is {df['temperature'].std()}")

The Max Tempreature is 35
The Min Tempreature is 24
The Mean Tempreature is 30.5
The Max Tempreature is 35
The Standard Deviation in Tempreature is 3.8858718455450894


In [135]:
# Statistical Analysis of All Values 
df['temperature'].describe()

count     6.000000
mean     30.500000
std       3.885872
min      24.000000
25%      29.000000
50%      32.000000
75%      32.000000
max      35.000000
Name: temperature, dtype: float64

### 3. Conditional Operation in Data Frame

In [114]:
# Return the Date when Event was Rain
df['day'][df['event'] == 'Rain']

0    1/1/2020
4    1/5/2020
Name: day, dtype: object

In [115]:
# Return all rows where Temperature is more than 32
df[df['temperature'] >= 32]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2020,32,6,Rain
1,1/2/2020,35,7,Sunny
4,1/5/2020,32,4,Rain
5,1/6/2020,32,2,Sunny


In [116]:
df[['day','temperature']][df['windspeed'] == df['windspeed'].max()]

Unnamed: 0,day,temperature
1,1/2/2020,35
3,1/4/2020,24


### 4. Working with index in Data Frame

In [117]:
df.set_index('day', inplace=True)
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2020,32,6,Rain
1/2/2020,35,7,Sunny
1/3/2020,28,2,Snow
1/4/2020,24,7,Snow
1/5/2020,32,4,Rain
1/6/2020,32,2,Sunny


In [118]:
df1.loc['1/6/2020']

temperature       32
windspeed          2
event          Sunny
Name: 1/6/2020, dtype: object

In [126]:
df.reset_index()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2020,32,6,Rain
1,1/2/2020,35,7,Sunny
2,1/3/2020,28,2,Snow
3,1/4/2020,24,7,Snow
4,1/5/2020,32,4,Rain
5,1/6/2020,32,2,Sunny


### 5. Various Ways to create Data Frame

In [None]:
import pandas as pd

#### 1. Using CSV

In [136]:
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2020,32,6,Rain
1,1/2/2020,35,7,Sunny
2,1/3/2020,28,2,Snow
3,1/4/2020,24,7,Snow
4,1/5/2020,32,4,Rain
5,1/6/2020,32,2,Sunny


#### 2. Using Excel

In [139]:
df = pd.read_excel('weather_data.xlsx','weather_data')
df

Unnamed: 0,Day,Temp,Wind Speed,Event
0,2020-01-01,32,6,Rain
1,2020-02-01,35,7,Sunny
2,2020-03-01,28,2,Snow
3,2020-04-01,24,7,Snow
4,2020-05-01,32,4,Rain
5,2020-06-01,32,2,Sunny


#### 3. Data Frame Using Dictionary

In [141]:
data = {
    'day' : ['01-01-2020','02-01-2020','03-01-2020','04-01-2020','05-01-2020','06-01-2020'],
    'Temp': [32,35,28,24,32,32],
    'Wind Speed' : [6,7,2,7,4,2],
    'Event' : ['Rain','Sunny','Snow','Snow','Rain','Sunny'],
}
df = pd.DataFrame(data)
df

Unnamed: 0,day,Temp,Wind Speed,Event
0,01-01-2020,32,6,Rain
1,02-01-2020,35,7,Sunny
2,03-01-2020,28,2,Snow
3,04-01-2020,24,7,Snow
4,05-01-2020,32,4,Rain
5,06-01-2020,32,2,Sunny


#### 4. Data Frame Using List of Tuples

In [152]:
data = [
    ('01-01-2020',32,6,'Rain'),
    ('02-01-2020',35,7,'Sunny'),
    ('03-01-2020',28,2,'Snow'),
    ('04-01-2020',24,7,'Snow'),
    ('05-01-2020',32,4,'Rain'),
    ('06-01-2020',32,2,'Sunny'),
]

df= pd.DataFrame(data,columns=['Day','Temp','WindSpeed','Event'])
df

Unnamed: 0,Day,Temp,WindSpeed,Event
0,01-01-2020,32,6,Rain
1,02-01-2020,35,7,Sunny
2,03-01-2020,28,2,Snow
3,04-01-2020,24,7,Snow
4,05-01-2020,32,4,Rain
5,06-01-2020,32,2,Sunny


#### 5. Data Frame Using List of Dictionary

In [154]:
data = [
    {'day':'01-01-2020','Temp':32,'Wind Speed':6,'Event':'Rain'},
    {'day':'02-01-2020','Temp':35,'Wind Speed':7,'Event':'Sunny'},
    {'day':'03-01-2020','Temp':28,'Wind Speed':2,'Event':'Snow'},
    {'day':'04-01-2020','Temp':24,'Wind Speed':7,'Event':'Snow'},
    {'day':'05-01-2020','Temp':32,'Wind Speed':4,'Event':'Rain'},
    {'day':'06-01-2020','Temp':32,'Wind Speed':2,'Event':'Sunny'}
]
df= pd.DataFrame(data)
df

Unnamed: 0,day,Temp,Wind Speed,Event
0,01-01-2020,32,6,Rain
1,02-01-2020,35,7,Sunny
2,03-01-2020,28,2,Snow
3,04-01-2020,24,7,Snow
4,05-01-2020,32,4,Rain
5,06-01-2020,32,2,Sunny


### 6. Write to Excel and CSV File

In [160]:
def convert_events(cell):
    if cell == 'n.a.':
        return 'Normal Weather'
    return cell

df = pd.read_csv('nyc_weather.csv',converters = {'Events':convert_events})
df.to_excel('nyc_weather.xlsx',sheet_name='nyc_weather')