# INCLASS NOTEBOOKS DAY-03

In [1]:
import pandas as pd

## Merge

In [2]:
left_frame = pd.DataFrame({'key': range(5), 
                           'left_value': ['a', 'b', 'c', 'd', 'e']})
right_frame = pd.DataFrame({'key': range(2, 7), 
                           'right_value': ['f', 'g', 'h', 'i', 'j']})
print(left_frame)
print('\n')
print(right_frame)

   key left_value
0    0          a
1    1          b
2    2          c
3    3          d
4    4          e


   key right_value
0    2           f
1    3           g
2    4           h
3    5           i
4    6           j


##  Inner Join

In [3]:
pd.merge(left_frame, right_frame, on='key', how='inner')

Unnamed: 0,key,left_value,right_value
0,2,c,f
1,3,d,g
2,4,e,h


## Left Join

In [4]:
pd.merge(left_frame, right_frame, on='key', how='left')

Unnamed: 0,key,left_value,right_value
0,0,a,
1,1,b,
2,2,c,f
3,3,d,g
4,4,e,h


## Right Join

In [5]:
pd.merge(left_frame, right_frame, on='key', how='right')

Unnamed: 0,key,left_value,right_value
0,2,c,f
1,3,d,g
2,4,e,h
3,5,,i
4,6,,j


## Outer Join

In [6]:
pd.merge(left_frame, right_frame, on='key', how='outer')

Unnamed: 0,key,left_value,right_value
0,0,a,
1,1,b,
2,2,c,f
3,3,d,g
4,4,e,h
5,5,,i
6,6,,j


## Merge on Index

Join can be used to combine columns of 2 dataframes that have different index values into a signle dataframe

The one difference between merge and join is that, merge uses common columns to combine two dataframes, whereas join uses the row index to join two dataframes

In [7]:
Table1 = pd.DataFrame({'Q1': ['101', '102', '103'],
                     'Q2': ['201', '202', '203']},
                      index=['I0', 'I1', 'I2']) 

Table2 = pd.DataFrame({'Q3': ['301', '302', '303'],
                    'Q4': ['401', '402', '403']},
                      index=['I0', 'I2', 'I3'])

In [8]:
Table1.join(Table2)

Unnamed: 0,Q1,Q2,Q3,Q4
I0,101,201,301.0,401.0
I1,102,202,,
I2,103,203,302.0,402.0


In [9]:
Table1.join(Table2, how='outer')

Unnamed: 0,Q1,Q2,Q3,Q4
I0,101.0,201.0,301.0,401.0
I1,102.0,202.0,,
I2,103.0,203.0,302.0,402.0
I3,,,303.0,403.0


## Concatenate

In [10]:
pd.concat([left_frame, right_frame])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,key,left_value,right_value
0,0,a,
1,1,b,
2,2,c,
3,3,d,
4,4,e,
0,2,,f
1,3,,g
2,4,,h
3,5,,i
4,6,,j


In [12]:
pd.concat([left_frame, right_frame], axis=1)

Unnamed: 0,key,left_value,key.1,right_value
0,0,a,2,f
1,1,b,3,g
2,2,c,4,h
3,3,d,5,i
4,4,e,6,j


## Combining

## Lets combine the two dataframes 

In [13]:
df=left_frame.append(right_frame, sort=True)

In [14]:
df

Unnamed: 0,key,left_value,right_value
0,0,a,
1,1,b,
2,2,c,
3,3,d,
4,4,e,
0,2,,f
1,3,,g
2,4,,h
3,5,,i
4,6,,j


## Ignoring indexes on the concatenation axis

In [15]:
result = left_frame.append(right_frame, ignore_index=True)

In [16]:
result

Unnamed: 0,key,left_value,right_value
0,0,a,
1,1,b,
2,2,c,
3,3,d,
4,4,e,
5,2,,f
6,3,,g
7,4,,h
8,5,,i
9,6,,j


## Reshaping

## Lets `Reshape` the data frame using 'melt' function

In [0]:
import pandas as pd
data = pd.read_csv("weather.csv")
data

Unnamed: 0,day,chicago,chennai,berlin
0,Monday,32,75,41
1,Tuesday,30,77,43
2,Wednesday,28,75,45
3,Thursday,22,82,38
4,Friday,30,83,30
5,Saturday,20,81,45
6,Sunday,25,77,47


Each column above represents the temperature at three different cities from monday to friday

## Lets transform the above table into three columns(Day, city and temperature)

In [0]:
df1=pd.melt(data, id_vars = ["day"], var_name ="city", value_name="temperature")
df1

Unnamed: 0,day,city,temperature
0,Monday,chicago,32
1,Tuesday,chicago,30
2,Wednesday,chicago,28
3,Thursday,chicago,22
4,Friday,chicago,30
5,Saturday,chicago,20
6,Sunday,chicago,25
7,Monday,chennai,75
8,Tuesday,chennai,77
9,Wednesday,chennai,75


## Lets filter the data only for chicago

In [0]:
df1[df1["city"]=="chicago"]

Unnamed: 0,day,city,temperature
0,Monday,chicago,32
1,Tuesday,chicago,30
2,Wednesday,chicago,28
3,Thursday,chicago,22
4,Friday,chicago,30
5,Saturday,chicago,20
6,Sunday,chicago,25


## Pivoting

In [0]:
import pandas as pd
import numpy as np
df = pd.read_csv("humidity.csv")
df

Unnamed: 0,date,city,temperature,humidity
0,5/1/2017,new york,65,56
1,5/2/2017,new york,66,58
2,5/3/2017,new york,68,60
3,5/1/2017,mumbai,75,80
4,5/2/2017,mumbai,78,83
5,5/3/2017,mumbai,82,85
6,5/1/2017,beijing,80,26
7,5/2/2017,beijing,77,30
8,5/3/2017,beijing,79,35


## Lets reshape our data : Row = city and date , Column = Temperatre and Humidity

In [0]:
df.pivot(index='date', columns='city')

Unnamed: 0_level_0,temperature,temperature,temperature,humidity,humidity,humidity
city,beijing,mumbai,new york,beijing,mumbai,new york
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
5/1/2017,80,75,65,26,80,56
5/2/2017,77,78,66,30,83,58
5/3/2017,79,82,68,35,85,60


## Lets reshape our data : Row = city and humidity , Column = Temperatre and date

In [0]:
df.pivot(index='humidity',columns='city')

Unnamed: 0_level_0,date,date,date,temperature,temperature,temperature
city,beijing,mumbai,new york,beijing,mumbai,new york
humidity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
26,5/1/2017,,,80.0,,
30,5/2/2017,,,77.0,,
35,5/3/2017,,,79.0,,
56,,,5/1/2017,,,65.0
58,,,5/2/2017,,,66.0
60,,,5/3/2017,,,68.0
80,,5/1/2017,,,75.0,
83,,5/2/2017,,,78.0,
85,,5/3/2017,,,82.0,


## Lets display the dataframe only for humidity

In [0]:
df.pivot(index='date',columns='city', values= "humidity")

city,beijing,mumbai,new york
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5/1/2017,26,80,56
5/2/2017,30,83,58
5/3/2017,35,85,60


## Pivoting Table

In [0]:
df = pd.read_csv("Humidity1.csv")
df

Unnamed: 0,date,city,temperature,humidity
0,05-01-2017,new york,65,56
1,05-01-2017,new york,61,54
2,05-02-2017,new york,70,60
3,05-02-2017,new york,71,62
4,05-01-2017,mumbai,75,80
5,05-01-2017,mumbai,78,83
6,05-02-2017,mumbai,82,85
7,05-02-2017,mumbai,80,26


In the above table, the temperature is given for morning and evening for the same date.

## Lets create a data frame which contains the average temperaure for all the days

In [0]:
df.pivot_table(index="city",columns="date")

Unnamed: 0_level_0,humidity,humidity,temperature,temperature
date,05-01-2017,05-02-2017,05-01-2017,05-02-2017
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
mumbai,81.5,55.5,76.5,81.0
new york,55.0,61.0,63.0,70.5


## Duplicating 

In [0]:
df = pd.read_csv("Humidity1.csv")
df

Unnamed: 0,date,city,temperature,humidity
0,05-01-2017,new york,65,56
1,05-01-2017,new york,61,54
2,05-02-2017,new york,70,60
3,05-02-2017,new york,71,62
4,05-01-2017,mumbai,75,80
5,05-01-2017,mumbai,78,83
6,05-02-2017,mumbai,82,85
7,05-02-2017,mumbai,80,26


## Lets check which rows are duplicated

In [0]:
df.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
dtype: bool

False means no duplication. We checked for the entire row. None of the row is matching with each other completely.

## Lets check for the duplication in certain columns

In [0]:
df.duplicated(['date'])

0    False
1     True
2    False
3     True
4     True
5     True
6     True
7     True
dtype: bool

## Lets check for the duplication in two columns together?

In [0]:
df.duplicated(['date','city'])

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
dtype: bool

In [0]:
df.duplicated(['date','city'], keep='last')

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
dtype: bool

Keep=last will throw false when the row is repeated from second time.

## Lets drop duplicates by considering city

In [0]:
df.drop_duplicates(['city'])

Unnamed: 0,date,city,temperature,humidity
0,05-01-2017,new york,65,56
4,05-01-2017,mumbai,75,80


## Mapping 

In [0]:
Season = {'new york':'winter', 'mumbai':'summer'}

## Now lets map the above dataframe "season" in our previous dataframe.

In [0]:
df['Season']=df['city'].map(Season)
df

Unnamed: 0,date,city,temperature,humidity,Season
0,05-01-2017,new york,65,56,winter
1,05-01-2017,new york,61,54,winter
2,05-02-2017,new york,70,60,winter
3,05-02-2017,new york,71,62,winter
4,05-01-2017,mumbai,75,80,summer
5,05-01-2017,mumbai,78,83,summer
6,05-02-2017,mumbai,82,85,summer
7,05-02-2017,mumbai,80,26,summer


## Replacing

Replace is used to replace any value in the dataframe. Lets replace the city new york with some other city

In [0]:
df['city'].replace('new york','Italy')

0     Italy
1     Italy
2     Italy
3     Italy
4    mumbai
5    mumbai
6    mumbai
7    mumbai
Name: city, dtype: object

## Renaming 

## Lets rename the column 'temperature' with 'temp'

In [0]:
df.rename(columns= {'temperature':'temp'})

Unnamed: 0,date,city,temp,humidity,Season
0,05-01-2017,new york,65,56,winter
1,05-01-2017,new york,61,54,winter
2,05-02-2017,new york,70,60,winter
3,05-02-2017,new york,71,62,winter
4,05-01-2017,mumbai,75,80,summer
5,05-01-2017,mumbai,78,83,summer
6,05-02-2017,mumbai,82,85,summer
7,05-02-2017,mumbai,80,26,summer


## Summary Statistics

In [0]:
import pandas as pd 

import csv
data = pd.read_csv("wine.csv", encoding="latin-1")

## Lets have a brief look at the first four rows of the data in table

In [0]:
data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",VulkÃ Bianco,87,,Sicily & Sardinia,Etna,,Kerin OâKeefe,@kerinokeefe,Nicosia 2013 VulkÃ Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÂ,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÂ,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


#head function with no arguments gets the first five rows of data from the data frame so the output will be as above

## Head function in python with arguments

In [0]:
data.head(8)

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",VulkÃ Bianco,87,,Sicily & Sardinia,Etna,,Kerin OâKeefe,@kerinokeefe,Nicosia 2013 VulkÃ Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÂ,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwineÂ,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
5,5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,Northern Spain,Navarra,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
6,6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin OâKeefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
7,7,France,This dry and restrained wine offers spice in p...,,87,24.0,Alsace,Alsace,,Roger Voss,@vossroger,Trimbach 2012 Gewurztraminer (Alsace),GewÃ¼rztraminer,Trimbach


head function with specified N arguments, gets the first N rows of data from the data frame so the output will be as above

In [0]:
data.tail()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
129966,129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr SpÃ¤tlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben MÃ¼ller-Burggraef) 2013...,Riesling,Dr. H. Thanisch (Erben MÃ¼ller-Burggraef)
129967,129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwineÂ,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,GewÃ¼rztraminer,Domaine Gresser
129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss
129970,129970,France,"Big, rich and off-dry, this is powered by inte...",Lieu-dit Harth CuvÃ©e Caroline,90,21.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Schoffit 2012 Lieu-dit Harth CuvÃ©e Ca...,GewÃ¼rztraminer,Domaine Schoffit


tail function with no arguments gets the last five rows of data from the data frame so the output will be as above

## Tail function in python with arguments

In [0]:
data.tail(8)

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
129963,129963,Israel,"A bouquet of black cherry, tart cranberry and ...",Oak Aged,90,20.0,Galilee,,,Mike DeSimone,@worldwineguys,Dalton 2012 Oak Aged Cabernet Sauvignon (Galilee),Cabernet Sauvignon,Dalton
129964,129964,France,"Initially quite muted, this wine slowly develo...",Domaine Saint-RÃ©my Herrenweg,90,,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Ehrhart 2013 Domaine Saint-RÃ©my Herre...,GewÃ¼rztraminer,Domaine Ehrhart
129965,129965,France,"While it's rich, this beautiful dry wine also ...",Seppi Landmann VallÃ©e Noble,90,28.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine RieflÃ©-Landmann 2013 Seppi Landmann V...,Pinot Gris,Domaine RieflÃ©-Landmann
129966,129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr SpÃ¤tlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben MÃ¼ller-Burggraef) 2013...,Riesling,Dr. H. Thanisch (Erben MÃ¼ller-Burggraef)
129967,129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwineÂ,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,GewÃ¼rztraminer,Domaine Gresser
129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss
129970,129970,France,"Big, rich and off-dry, this is powered by inte...",Lieu-dit Harth CuvÃ©e Caroline,90,21.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Schoffit 2012 Lieu-dit Harth CuvÃ©e Ca...,GewÃ¼rztraminer,Domaine Schoffit


tail function with specified N arguments, gets the last N rows of data from the data frame so the output will be as above

## describe() Function

## Lets get the Summary Statistic of the numeric columns:

In [0]:
data.describe()

Unnamed: 0.1,Unnamed: 0,points,price
count,129971.0,129971.0,120975.0
mean,64985.0,88.447138,35.363389
std,37519.540256,3.03973,41.022218
min,0.0,80.0,4.0
25%,32492.5,86.0,17.0
50%,64985.0,88.0,25.0
75%,97477.5,91.0,42.0
max,129970.0,100.0,3300.0


describe() Function gives the mean, std and IQR values. It excludes character column and calculate summary statistics only for numeric columns

## Lets get the Summary Statistic of the character columns:

In [0]:
data.describe(include=['object'])

Unnamed: 0,country,description,designation,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
count,129908,129971,92506,129908,108724,50511,103727,98758,129971,129970,129971
unique,43,119955,37979,425,1229,17,19,15,118840,707,16757
top,US,"Cigar box, cafÃ© au lait, and dried tobacco ar...",Reserve,California,Napa Valley,Central Coast,Roger Voss,@vossroger,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Pinot Noir,Wines & Winemakers
freq,54504,3,2009,36247,4480,11065,25514,25514,11,13272,222


describe() Function with an argument named include along with value object i.e include=’object’ gives the summary statistics of the character columns.


## Lets get the Summary Statistic of all the columns

In [0]:
data.describe(include='all')

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
count,129971.0,129908,129971,92506,129971.0,120975.0,129908,108724,50511,103727,98758,129971,129970,129971
unique,,43,119955,37979,,,425,1229,17,19,15,118840,707,16757
top,,US,"Cigar box, cafÃ© au lait, and dried tobacco ar...",Reserve,,,California,Napa Valley,Central Coast,Roger Voss,@vossroger,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Pinot Noir,Wines & Winemakers
freq,,54504,3,2009,,,36247,4480,11065,25514,25514,11,13272,222
mean,64985.0,,,,88.447138,35.363389,,,,,,,,
std,37519.540256,,,,3.03973,41.022218,,,,,,,,
min,0.0,,,,80.0,4.0,,,,,,,,
25%,32492.5,,,,86.0,17.0,,,,,,,,
50%,64985.0,,,,88.0,25.0,,,,,,,,
75%,97477.5,,,,91.0,42.0,,,,,,,,


describe() Function with include=’all’ gives the summary statistics of all the columns.

## Lets get the basic information about our data

In [0]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129971 entries, 0 to 129970
Data columns (total 14 columns):
Unnamed: 0               129971 non-null int64
country                  129908 non-null object
description              129971 non-null object
designation              92506 non-null object
points                   129971 non-null int64
price                    120975 non-null float64
province                 129908 non-null object
region_1                 108724 non-null object
region_2                 50511 non-null object
taster_name              103727 non-null object
taster_twitter_handle    98758 non-null object
title                    129971 non-null object
variety                  129970 non-null object
winery                   129971 non-null object
dtypes: float64(1), int64(2), object(11)
memory usage: 13.9+ MB


##  Reading in data and looking at the number of rows and columns using shape function

In [2]:
import pandas as pd

In [4]:
NEIC = pd.read_csv('earthquake.csv',sep = ',')
NEIC.shape

(23412, 21)

## Identifying the unique factors of Earthquakes.


In [9]:
from numpy import *
print(unique(NEIC.Type))
print(NEIC['Magnitude Type'].unique())

['Earthquake' 'Explosion' 'Nuclear Explosion' 'Rock Burst']
['MW' 'ML' 'MH' 'MS' 'MB' 'MWC' 'MD' nan 'MWB' 'MWW' 'MWR']


##  Lets identify the minimum, maximum and average of the Magnitude of the earthquake

In [6]:
minimum=NEIC["Magnitude"].min()
maximum=NEIC["Magnitude"].max()
average=NEIC["Magnitude"].mean()

print("Minimum: " ,minimum)
print("Maximum: " ,maximum)
print("Average: " ,average)

Minimum:  5.5
Maximum:  9.1
Average:  5.882530753460003


## finding Highly effected areas , assuming the one with Magnitude >8

In [7]:
highly_effected=NEIC[NEIC["Magnitude"]>=8]
print(highly_effected)

             Date      Time  Latitude  Longitude        Type   Depth  \
11     01/24/1965  00:11:17   -2.6080   125.9520  Earthquake   20.00   
16     02/04/1965  05:01:22   51.2510   178.7150  Earthquake   30.30   
538    10/17/1966  21:42:00  -10.6650   -78.2280  Earthquake   40.00   
911    05/16/1968  00:49:02   40.8600   143.4350  Earthquake   29.90   
1663   07/31/1970  17:08:05   -1.5970   -72.5320  Earthquake  644.80   
1983   07/14/1971  06:11:30   -5.5240   153.8500  Earthquake   40.00   
2008   07/26/1971  01:23:22   -4.8170   153.1720  Earthquake   40.00   
2530   12/02/1972  00:19:52    6.4050   126.6400  Earthquake   60.00   
3770   01/14/1976  16:47:34  -28.4270  -177.6570  Earthquake   33.00   
7415   03/03/1985  22:47:07  -33.1350   -71.8710  Earthquake   33.00   
7699   09/19/1985  13:17:47   18.1900  -102.5330  Earthquake   27.90   
7959   05/07/1986  22:47:11   51.5200  -174.7760  Earthquake   33.00   
9484   05/23/1989  10:54:46  -52.3410   160.5680  Earthquake   1

## skewness and kurtosis of Depth.

In [12]:
print("Skewness: %f" % NEIC['Depth'].skew())
print("Kurtosis: %f" % NEIC['Depth'].kurt()), encoding="latin-1"

Skewness: 3.290683
Kurtosis: 10.456851


In [20]:
data = pd.read_csv("wine.csv")

In [21]:
data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [22]:
data.tail()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
129966,129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss
129970,129970,France,"Big, rich and off-dry, this is powered by inte...",Lieu-dit Harth Cuvée Caroline,90,21.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Schoffit 2012 Lieu-dit Harth Cuvée Car...,Gewürztraminer,Domaine Schoffit


## Lets find out the mean of the points

In [23]:
data['points'].mean() #Mean of the dataframe:

88.44713820775404

## Lets calculate the median of the specific Column

In [24]:
data['points'].median()

88.0

## Lets calculate the mode of the specific column

In [25]:
data['points'].mode()

0    88
dtype: int64

## Lets calculate the standard deviation of a data frame

In [26]:
data.std()

Unnamed: 0    37519.540256
points            3.039730
price            41.022218
dtype: float64

## Lets calculate the standard deviation of the data frame column wise

In [27]:
data.std(axis=0)

Unnamed: 0    37519.540256
points            3.039730
price            41.022218
dtype: float64

## Lets calculate the standard deviation of a specefic column "points"

In [28]:
data.loc[:,"points"].std()

3.0397302029162336

-----------------------

# ------------------------------------- HAPPY LEARNING --------------------------------------------

---------------------------------