# Introduction to Pandas

DataFrame - allows us to create an array of individual entries and values by rows and columns

In [8]:
import pandas as pd
pd.DataFrame({'YES' : [50, 21], 'NO': [131,2]})

Unnamed: 0,YES,NO
0,50,131
1,21,2


In [19]:
# A much more cleaner code
# Index stands for the description of the 1st column


pd.DataFrame({'BOB' : ['I liked it', 'It was awful'],
             'ANGEL' : ['It was fine', 'Distasteful']}, 
              index = ['Product A', 'Product B'])



Unnamed: 0,BOB,ANGEL
Product A,I liked it,It was fine
Product B,It was awful,Distasteful


In [10]:
# An example of series

pd.Series([30, 40, 50], index = ['Product A', 'Product B', 'Product C'], name='Product A')


Product A    30
Product B    40
Product C    50
Name: Product A, dtype: int64

## Reading Data

In [16]:
# train_data = ('~/home/jdev/Documents/Programming/Kaggle/Learning-Kaggle/Tutorial/train.csv', index_col = 0)
# train_data.shape()
# train_data.head()

In [18]:
pd.DataFrame({'APPLES' :[30], 
             'BANANA' : [21] })

Unnamed: 0,APPLES,BANANA
0,30,21


## Saving to CSV

In [24]:
animals = pd.DataFrame({'Giraffe' : ['21 feet'],
                       'Shark' : ['10 feet']}, index = ['size'])
animals

Unnamed: 0,Giraffe,Shark
size,21 feet,10 feet


In [25]:
animals.to_csv("Giraffe_and_Shark.csv")

# Indexing, Selecting, & Assigning

In [36]:
# Read the data
data = pd.read_csv("train.csv", index_col = 0)

In [39]:
# Set max rows

pd.set_option('max_rows', 5)

In [40]:
data

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,4,2010,WD,Normal,142125
1460,20,RL,75.0,9937,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,6,2008,WD,Normal,147500


In [45]:
# Data[Column][Row]
data['LotArea'][2]

9600

## Indexing in Pandas

Index-based selection: selecting data based on its position

In [47]:
# Data.iloc[Head][Empty = 1st Row]
data.iloc[0]

MSSubClass           60
MSZoning             RL
                  ...  
SaleCondition    Normal
SalePrice        208500
Name: 1, Length: 80, dtype: object

In [48]:
data.iloc[1][2]

80.0

In [50]:
# Data[: - 1st Column][1st Row]
data.iloc[:, 0]

Id
1       60
2       20
        ..
1459    20
1460    20
Name: MSSubClass, Length: 1460, dtype: int64

In [51]:
data.iloc[:3,0]

Id
1    60
2    20
3    60
Name: MSSubClass, dtype: int64

In [52]:
data.iloc[-10]

MSSubClass           90
MSZoning             RL
                  ...  
SaleCondition    Normal
SalePrice        136000
Name: 1451, Length: 80, dtype: object

Label-based selection using the loc operator

In [60]:
data

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,4,2010,WD,Normal,142125
1460,20,RL,75.0,9937,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,6,2008,WD,Normal,147500


In [63]:
data.loc[2]

MSSubClass           20
MSZoning             RL
                  ...  
SaleCondition    Normal
SalePrice        181500
Name: 2, Length: 80, dtype: object

### Manipulating Index

Sets the index at the start

In [66]:
data.set_index("MSSubClass")

Unnamed: 0_level_0,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
MSSubClass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,...,0,,,,0,2,2008,WD,Normal,208500
20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,...,0,,,,0,5,2007,WD,Normal,181500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,Inside,Gtl,...,0,,,,0,4,2010,WD,Normal,142125
20,RL,75.0,9937,Pave,,Reg,Lvl,AllPub,Inside,Gtl,...,0,,,,0,6,2008,WD,Normal,147500


### Conditional Selection

In [68]:
data = pd.read_csv("winemag-data_first150k.csv")
data

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
...,...,...,...,...,...,...,...,...,...,...,...
150928,150928,France,"A perfect salmon shade, with scents of peaches...",Grand Brut Rosé,90,52.0,Champagne,Champagne,,Champagne Blend,Gosset
150929,150929,Italy,More Pinot Grigios should taste like this. A r...,,90,15.0,Northeastern Italy,Alto Adige,,Pinot Grigio,Alois Lageder


In [78]:
data['description']

0         This tremendous 100% varietal wine hails from ...
1         Ripe aromas of fig, blackberry and cassis are ...
                                ...                        
150928    A perfect salmon shade, with scents of peaches...
150929    More Pinot Grigios should taste like this. A r...
Name: description, Length: 150930, dtype: object

In [84]:
data['description'][:10]

0    This tremendous 100% varietal wine hails from ...
1    Ripe aromas of fig, blackberry and cassis are ...
                           ...                        
8    This re-named vineyard was formerly bottled as...
9    The producer sources from two blocks of the vi...
Name: description, Length: 10, dtype: object

In [82]:
data.iloc[0]

Unnamed: 0                     0
country                       US
                     ...        
variety       Cabernet Sauvignon
winery                     Heitz
Name: 0, Length: 11, dtype: object

In [87]:
data.iloc[0][:5]

Unnamed: 0                                                     0
country                                                       US
description    This tremendous 100% varietal wine hails from ...
designation                                    Martha's Vineyard
points                                                        96
Name: 0, dtype: object

In [95]:
data.iloc[[1,2,3,4,5,8]]

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
...,...,...,...,...,...,...,...,...,...,...,...
5,5,Spain,"Deep, dense and pure from the opening bell, th...",Numanthia,95,73.0,Northern Spain,Toro,,Tinta de Toro,Numanthia
8,8,US,This re-named vineyard was formerly bottled as...,Silice,95,65.0,Oregon,Chehalem Mountains,Willamette Valley,Pinot Noir,Bergström
