In [1]:
import pandas as pd
pd.set_option('display.max_rows', 5)

In [2]:
# creating pandas dataframe 
pd.DataFrame({'Yes': [50, 21], 'No': [131, 2]})

Unnamed: 0,Yes,No
0,50,131
1,21,2


In [3]:
# Dataframe entries are not limited to integers, can be string too
pd.DataFrame({'Bob': ['I liked it.', 'It was awful'], 'Sue': ['Pretty Good', 'Bland']})

Unnamed: 0,Bob,Sue
0,I liked it.,Pretty Good
1,It was awful,Bland


In [4]:
# list of row labels in DataFrame is called index, and we can assign it with names
pd.DataFrame({'Bob': ['I liked it', 'It was awful'],
              'Sue': ['Pretty good', 'Bland']},
              index= ['Product A', 'Product B'])

Unnamed: 0,Bob,Sue
Product A,I liked it,Pretty good
Product B,It was awful,Bland


In [5]:
# SERIES
pd.Series([1, 2, 3, 4, 5, 6])

0    1
1    2
    ..
4    5
5    6
Length: 6, dtype: int64

In [6]:
# Assigning row names to series, but since a single column of a dataframe, it has only one name for the data not multiple columns
pd.Series([30, 40, 50], index=['2015 Sales', '2016 Sales', '2017 Sales'], name='Product A')

2015 Sales    30
2016 Sales    40
2017 Sales    50
Name: Product A, dtype: int64

In [7]:
# Series and Dataframe are closely related, think about dataframe as a collection of series glued together

In [9]:
# READING DATA 
wine_reviews = pd.read_csv("/home/oktavianu/data/winemag-data-130k-v2.csv")

In [10]:
# check how large is our data by using shape
wine_reviews.shape

(129971, 14)

In [11]:
# use head() method to review the data, the first five rows in each column
wine_reviews.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [12]:
# check the columns name
wine_reviews.columns

Index(['Unnamed: 0', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'region_2', 'taster_name',
       'taster_twitter_handle', 'title', 'variety', 'winery'],
      dtype='object')

In [14]:
# specify index col, so that pandas will use 'country' column as its index
wine_reviews = pd.read_csv('/home/oktavianu/data/winemag-data-130k-v2.csv', index_col=0)
wine_reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [15]:
wine_reviews.shape

(129971, 13)

In [16]:
# EXERCISE 
# Create dataframe
fruits = pd.DataFrame({'Apples': [30], 'Banana': [21]})
fruits

Unnamed: 0,Apples,Banana
0,30,21


In [17]:
# adding index name
fruits = pd.DataFrame({'Apples': [35, 41], 'Bananas': [21, 34]}, 
                     index=['2017 Sales', '2018 Sales'])
fruits

Unnamed: 0,Apples,Bananas
2017 Sales,35,21
2018 Sales,41,34


In [18]:
# Series of ingredients
ingredients = pd.Series(['4 cups', '1 cup', '2 large', '1 can'], index=['Flour', 'Milk', 'Eggs', 'Spam'], name='Dinner')
ingredients

Flour     4 cups
Milk       1 cup
Eggs     2 large
Spam       1 can
Name: Dinner, dtype: object

In [19]:
# writing

In [20]:
animals = pd.DataFrame({'Cows': [12, 20], 'Goats': [22, 19]}, index=['Year 1', 'Year 2'])
animals

Unnamed: 0,Cows,Goats
Year 1,12,22
Year 2,20,19


In [None]:
animals.to_csv()