**DMA: PANDAS TUTORIAL**

In [0]:
import pandas as pd

#### Creating a new dataframe

In [0]:
# Dataframe from a dictionary
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002],
'population': [1.5, 1.7, 3.6, 2.4, 2.9]}
df = pd.DataFrame(data)

# Dataframe from a list
#df = pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]], columns=["col1", "col2", "col3", "col4"])

In [3]:
#type(df)
#type(data)
df.head()


Unnamed: 0,population,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


#### Understanding your dataframe

In [0]:
df.shape
df.columns

Index([u'population', u'state', u'year'], dtype='object')

#### Accessing Columns

In [0]:
df['state']
df.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
Name: state, dtype: object

#### A quick statistical profile of all the numeric columns

In [0]:
df.describe()

Unnamed: 0,population,year
count,5.0,5.0
mean,2.42,2001.2
std,0.864292,0.83666
min,1.5,2000.0
25%,1.7,2001.0
50%,2.4,2001.0
75%,2.9,2002.0
max,3.6,2002.0


#### Profile of a column

In [0]:
df['population'].describe()

count    5.000000
mean     2.420000
std      0.864292
min      1.500000
25%      1.700000
50%      2.400000
75%      2.900000
max      3.600000
Name: population, dtype: float64

#### Can we get the profile of a column that is not numeric, like state?

In [0]:
df['state'].describe()

### Slicing

In [0]:
#Get the first 3 rows of df
df[:3]

Unnamed: 0,population,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002


#### Can we get a specific row of the dataframe?

In [0]:
df[1:2]

Unnamed: 0,population,state,year
1,1.7,Ohio,2001


#### loc and iloc

In [0]:
df.loc[1]
df.iloc[1]

population     1.7
state         Ohio
year          2001
Name: 1, dtype: object

#### The difference between regular slicing and loc/iloc shows when you wish to access individual cells.

In [0]:
df.loc[1:2]

df.iloc[1,2]

#df.loc[:3]

2001

#### With the magic of loc/iloc, slicing can work for both rows and colums

In [0]:
df.loc[:3, :'state']

#df.iloc[:3,:2]

Unnamed: 0,population,state
0,1.5,Ohio
1,1.7,Ohio
2,3.6,Ohio
3,2.4,Nevada


### Now what if you wish to import a csv file instead of creating one?

In [0]:
!wget http://people.ischool.berkeley.edu/~zp/course_datasets/yelp_reviews.csv
#!unzip yelp_reviews.zip
print('Dataset Downloaded: yelp_reviews.csv')
df=pd.read_csv('yelp_reviews.csv')
print(df.head())

--2019-01-31 06:54:35--  http://people.ischool.berkeley.edu/~zp/course_datasets/yelp_reviews.csv
Resolving people.ischool.berkeley.edu (people.ischool.berkeley.edu)... 128.32.78.16
Connecting to people.ischool.berkeley.edu (people.ischool.berkeley.edu)|128.32.78.16|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 376638166 (359M) [text/csv]
Saving to: ‘yelp_reviews.csv’


2019-01-31 06:56:37 (2.97 MB/s) - ‘yelp_reviews.csv’ saved [376638166/376638166]

Dataset Downloaded: yelp_reviews.csv
     type             business_id                 user_id  stars  \
0  review  mxrXVZWc6PWk81gvOVNOUw  mv7shusL4Xb6TylVYBv4CA      4   
1  review  mxrXVZWc6PWk81gvOVNOUw  0aN5QPhs-VwK2vusKG0waQ      5   
2  review  kK4AzZ0YWI-U2G-paAL7Fg  0aN5QPhs-VwK2vusKG0waQ      5   
3  review  mxrXVZWc6PWk81gvOVNOUw  1JUwyYab-uJzEx_FRd81Zg      5   
4  review  mxrXVZWc6PWk81gvOVNOUw  2Zd3Xy8hUVmZkNg7RyNjhg      4   

                                                text        date  cool_vo

In [0]:
df.head()

Unnamed: 0,type,business_id,user_id,stars,text,date,cool_votes,useful_votes,funny_votes
0,review,mxrXVZWc6PWk81gvOVNOUw,mv7shusL4Xb6TylVYBv4CA,4,Definitely try the duck dish. I rank it amon...,2011-06-13,0,0,0
1,review,mxrXVZWc6PWk81gvOVNOUw,0aN5QPhs-VwK2vusKG0waQ,5,Big Ass Burger was awesome! Great $5 mojitos. ...,2011-06-25,1,0,0
2,review,kK4AzZ0YWI-U2G-paAL7Fg,0aN5QPhs-VwK2vusKG0waQ,5,Unbelievable sandwiches! Good service.,2011-06-25,0,0,0
3,review,mxrXVZWc6PWk81gvOVNOUw,1JUwyYab-uJzEx_FRd81Zg,5,"Awesome, awesome, awesome! My mom and sister a...",2011-07-18,1,1,0
4,review,mxrXVZWc6PWk81gvOVNOUw,2Zd3Xy8hUVmZkNg7RyNjhg,4,I had the ribs they were great. The beer sele...,2011-07-19,1,0,1


#### Slicing

In [0]:
#Slicing

df[:500]

#Slicing a column

df['cool_votes'][:500]

0      0
1      1
2      0
3      1
4      1
5      0
6      0
7      0
8      1
9      0
10     0
11     0
12     0
13     1
14     0
15     0
16     1
17     0
18     0
19     0
20     0
21     0
22     0
23     1
24     0
25     0
26     0
27     0
28     0
29     0
      ..
470    0
471    0
472    2
473    0
474    0
475    0
476    0
477    0
478    0
479    0
480    0
481    0
482    0
483    0
484    0
485    0
486    0
487    0
488    0
489    0
490    0
491    0
492    1
493    0
494    0
495    0
496    0
497    0
498    0
499    0
Name: cool_votes, Length: 500, dtype: int64

### Filtering a dataset based on column values

In [0]:
df_uncool = df[df['cool_votes']==0]

df_uncool.head()
#Filter a subset based on column values 
df[:1500][df['cool_votes']==0]

  """


Unnamed: 0,type,business_id,user_id,stars,text,date,cool_votes,useful_votes,funny_votes
0,review,mxrXVZWc6PWk81gvOVNOUw,mv7shusL4Xb6TylVYBv4CA,4,Definitely try the duck dish. I rank it amon...,2011-06-13,0,0,0
2,review,kK4AzZ0YWI-U2G-paAL7Fg,0aN5QPhs-VwK2vusKG0waQ,5,Unbelievable sandwiches! Good service.,2011-06-25,0,0,0
5,review,iDYzGVIF1TDWdjHNgNjCVw,2Zd3Xy8hUVmZkNg7RyNjhg,4,"Great food, great drink. I had the crab enchil...",2011-07-19,0,0,0
6,review,dsMvINhoQbIQgSRTBv2B6g,2Zd3Xy8hUVmZkNg7RyNjhg,3,The only thing keeping this 5 stars is the pri...,2012-10-07,0,2,0
7,review,ZXRcWs5SUCvSfb8I2aLOnA,2Zd3Xy8hUVmZkNg7RyNjhg,4,My wife and I have been to many different Spas...,2010-08-24,0,0,0
9,review,GGVcdnKoRXldVEgergSF-A,2Zd3Xy8hUVmZkNg7RyNjhg,4,Good lunch place. Everything I have had is be...,2012-11-17,0,0,0
10,review,1Q_H3BLhKBymF6Se8FfZNw,2Zd3Xy8hUVmZkNg7RyNjhg,3,When you look at the price you pay you expect ...,2010-08-24,0,0,0
11,review,YKOvlBNkF4KpUP9q7x862w,2Zd3Xy8hUVmZkNg7RyNjhg,3,"Very nice restaurant, great food, great servic...",2015-08-24,0,0,0
12,review,YYy3aQoLvjLhXPkpB32uzA,2Zd3Xy8hUVmZkNg7RyNjhg,3,The food and service are OK. I will go back. ...,2014-04-22,0,0,0
14,review,sjCRI-lCh4KLO_RYQdlEeQ,2Zd3Xy8hUVmZkNg7RyNjhg,1,I went here with Friends last Friday. All my ...,2011-06-27,0,5,1


# GROUPBY!!!!

In [0]:
grouped_reviews = df.groupby('business_id')

In [0]:
grouped_reviews.head()

Unnamed: 0,type,business_id,user_id,stars,text,date,cool_votes,useful_votes,funny_votes
0,review,mxrXVZWc6PWk81gvOVNOUw,mv7shusL4Xb6TylVYBv4CA,4,Definitely try the duck dish. I rank it amon...,2011-06-13,0,0,0
1,review,mxrXVZWc6PWk81gvOVNOUw,0aN5QPhs-VwK2vusKG0waQ,5,Big Ass Burger was awesome! Great $5 mojitos. ...,2011-06-25,1,0,0
2,review,kK4AzZ0YWI-U2G-paAL7Fg,0aN5QPhs-VwK2vusKG0waQ,5,Unbelievable sandwiches! Good service.,2011-06-25,0,0,0
3,review,mxrXVZWc6PWk81gvOVNOUw,1JUwyYab-uJzEx_FRd81Zg,5,"Awesome, awesome, awesome! My mom and sister a...",2011-07-18,1,1,0
4,review,mxrXVZWc6PWk81gvOVNOUw,2Zd3Xy8hUVmZkNg7RyNjhg,4,I had the ribs they were great. The beer sele...,2011-07-19,1,0,1
5,review,iDYzGVIF1TDWdjHNgNjCVw,2Zd3Xy8hUVmZkNg7RyNjhg,4,"Great food, great drink. I had the crab enchil...",2011-07-19,0,0,0
6,review,dsMvINhoQbIQgSRTBv2B6g,2Zd3Xy8hUVmZkNg7RyNjhg,3,The only thing keeping this 5 stars is the pri...,2012-10-07,0,2,0
7,review,ZXRcWs5SUCvSfb8I2aLOnA,2Zd3Xy8hUVmZkNg7RyNjhg,4,My wife and I have been to many different Spas...,2010-08-24,0,0,0
8,review,YQvg0JCGRFUkb6reMMf3Iw,2Zd3Xy8hUVmZkNg7RyNjhg,4,This place in my mind is Postinos meets Bianco...,2011-07-11,1,1,0
9,review,GGVcdnKoRXldVEgergSF-A,2Zd3Xy8hUVmZkNg7RyNjhg,4,Good lunch place. Everything I have had is be...,2012-11-17,0,0,0


In [0]:
grouped_reviews = df.groupby('business_id').mean()

#grouped_reviews = df.groupby('business_id').count()

In [0]:
grouped_reviews.head()

Unnamed: 0_level_0,stars,cool_votes,useful_votes,funny_votes
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
--5jkZ3-nUPZxUvtcbr8Uw,4.68,0.16,0.12,0.12
--AKjxBmhm9DWrh-e0hTOw,5.0,0.0,0.0,0.0
--BlvDO_RG2yElKu9XA1_g,4.6,0.1,0.3,0.1
--Ol5mVSMaW8ExtmWRUmKA,5.0,0.0,1.0,0.0
--Y_2lDOtVDioX5bwF6GIw,2.666667,0.0,0.666667,0.0


#### Performing operations on columns

In [0]:
df['new_stars'] = 2*df['stars']

#### Use of numpy

In [0]:
import numpy as np

df['new_column'] = np.mean(df['cool_votes'])

In [0]:
df

Unnamed: 0,type,business_id,user_id,stars,text,date,cool_votes,useful_votes,funny_votes,new_stars,new_column
0,review,mxrXVZWc6PWk81gvOVNOUw,mv7shusL4Xb6TylVYBv4CA,4,Definitely try the duck dish. I rank it amon...,2011-06-13,0,0,0,8,0.389438
1,review,mxrXVZWc6PWk81gvOVNOUw,0aN5QPhs-VwK2vusKG0waQ,5,Big Ass Burger was awesome! Great $5 mojitos. ...,2011-06-25,1,0,0,10,0.389438
2,review,kK4AzZ0YWI-U2G-paAL7Fg,0aN5QPhs-VwK2vusKG0waQ,5,Unbelievable sandwiches! Good service.,2011-06-25,0,0,0,10,0.389438
3,review,mxrXVZWc6PWk81gvOVNOUw,1JUwyYab-uJzEx_FRd81Zg,5,"Awesome, awesome, awesome! My mom and sister a...",2011-07-18,1,1,0,10,0.389438
4,review,mxrXVZWc6PWk81gvOVNOUw,2Zd3Xy8hUVmZkNg7RyNjhg,4,I had the ribs they were great. The beer sele...,2011-07-19,1,0,1,8,0.389438
5,review,iDYzGVIF1TDWdjHNgNjCVw,2Zd3Xy8hUVmZkNg7RyNjhg,4,"Great food, great drink. I had the crab enchil...",2011-07-19,0,0,0,8,0.389438
6,review,dsMvINhoQbIQgSRTBv2B6g,2Zd3Xy8hUVmZkNg7RyNjhg,3,The only thing keeping this 5 stars is the pri...,2012-10-07,0,2,0,6,0.389438
7,review,ZXRcWs5SUCvSfb8I2aLOnA,2Zd3Xy8hUVmZkNg7RyNjhg,4,My wife and I have been to many different Spas...,2010-08-24,0,0,0,8,0.389438
8,review,YQvg0JCGRFUkb6reMMf3Iw,2Zd3Xy8hUVmZkNg7RyNjhg,4,This place in my mind is Postinos meets Bianco...,2011-07-11,1,1,0,8,0.389438
9,review,GGVcdnKoRXldVEgergSF-A,2Zd3Xy8hUVmZkNg7RyNjhg,4,Good lunch place. Everything I have had is be...,2012-11-17,0,0,0,8,0.389438
