In [1]:
import pandas as pd

In [2]:
animals = ['tiger', 'bear', 'moose']
pd.Series(animals)


0    tiger
1     bear
2    moose
dtype: object

In [3]:
nums = [1, 2, 3]
pd.Series(nums)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['tiger', 'bear', None]
pd.Series(animals)

0    tiger
1     bear
2     None
dtype: object

In [5]:
nums = [1, 2, None]
x = pd.Series(nums)
x


0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
x.name
type(x.name)

NoneType

In [7]:
new_nums = [1, 2, 3, 4]
y = pd.Series(new_nums, name='my_numbers')
y

0    1
1    2
2    3
3    4
Name: my_numbers, dtype: int64

In [8]:
y.name

'my_numbers'

In [9]:
# Assign custom indexes to Series
animals = pd.Series(['tiger', 'bear', 'moose'], index=['india', 'usa', 'canada'])
animals

india     tiger
usa        bear
canada    moose
dtype: object

In [10]:
# Creation of a Series using Dictionary
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea',
         }
pd.Series(sports)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [11]:
#If a "key" is not present "NaN" is stored
#If a "value" is not present "None" is stored
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          None: 'Japan',
          'Taekwondo': None,
         }
pd.Series(sports)

Archery        Bhutan
Golf         Scotland
NaN             Japan
Taekwondo        None
dtype: object

In [12]:
"""
If "keys" and "values" both are numbers and any value from keys/values is absent then both stored as NaN
and dtype is set as "float64"
"""

num_dict = {1:1,
            2:2,
            None:3,
            4:None}
pd.Series(num_dict)

1      1.0
2      2.0
NaN    3.0
4      NaN
dtype: float64

# Querying a Series

In [13]:
sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [14]:
# Wrong way: () 
# Right way: []
# Because "iloc" and "loc" are 'attributes' and not the 'methods'

# Therefore, s.iloc(2) or s.loc('Golf') will give error
s.iloc[2]

'Japan'

In [15]:
s.loc['Golf']

'Scotland'

In [16]:
s[2]

'Japan'

In [17]:
s['Golf']

'Scotland'

In [18]:
test = pd.Series(['one', 'two', 'three'], index=[(1, 2, 3), 'b', 3])
test

(1, 2, 3)      one
b              two
3            three
dtype: object

In [19]:
test['b']
test[3]
test[(1, 2, 3)]

test.loc['b']
test.loc[3]
test[(1, 2, 3)]

'one'

In [20]:
"""
If we set the numerical indexes and then try to get the series using the positional indexes then
we will get an error because Pandas will get confused whether to use positional-indexes or label-indexes 
"""

num_index_sports = {1:'China', 2:'India', 3:'US', 4:'Indonesia', 5:'Brazil'}
num_index_series = pd.Series(num_index_sports)
num_index_series

1        China
2        India
3           US
4    Indonesia
5       Brazil
dtype: object

In [21]:
"""
    Here if we try to get the first element using the positional-index 0,
    then we will get an error because it is now confused between
    numeric-label-indexes & default-numeric-positonal-indexes
"""
# num_index_series[0]
num_index_series.iloc[0]

'China'

In [22]:
import numpy as np
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums.head()

0    504
1    549
2    310
3     35
4    895
dtype: int64

In [23]:
%%timeit -n 100
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
total = 0
for item in rand_nums:
    total += item

1 ms ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
%%timeit -n 100
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
total = sum(rand_nums)

642 µs ± 50.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [25]:
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums.head()

0    195
1    866
2     85
3    437
4    371
dtype: int64

In [26]:
%%timeit -n 10
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in rand_nums.iteritems():
    rand_nums.at[label] = value+2

121 ms ± 1.55 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [27]:
%%timeit -n 10
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums += 2

The slowest run took 29.47 times longer than the fastest. This could mean that an intermediate result is being cached.
1.67 ms ± 3.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Add new data 

In [28]:
s = pd.Series([1 ,2, 3])
s

0    1
1    2
2    3
dtype: int64

In [29]:
s.loc['name'] = 'Mohit'
s

0           1
1           2
2           3
name    Mohit
dtype: object

In [30]:
original_sports = pd.Series({'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'})
cricket_loving = pd.Series(['Australia', 'Barbados', 'Pakistan', 'England'], index=['Cricket', 'Cricket', 'Cricket', 'Cricket',])

In [31]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [32]:
cricket_loving

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [33]:
all_countries = original_sports.append(cricket_loving)
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [34]:
# Change the 'Cricket' to 5, 6, 7, 8 then also it will run fine

# One important thing to note is that the both the ORIGINAL Series are not MUTATED

original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [35]:
cricket_loving

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [36]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [37]:

original_sports.append(cricket_loving)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [38]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [39]:
type(all_countries.loc['Golf'])

str

In [40]:
type(all_countries['Cricket'])

pandas.core.series.Series

# Data Frame

In [41]:
purchase_1 = pd.Series({
    'Name': 'Chris',
    'Item Purchased': 'Dog Food',
    'Cost': 22.50
})
purchase_2 = pd.Series({
    'Name': 'Kevyn',
    'Item Purchased': 'Cat Litter',
    'Cost': 2.5
})
purchase_3 = pd.Series({
    'Name': 'Vinod',
    'Item Purchased': 'Bird Seed',
    'Cost': 5.00
})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 3'])

df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Cat Litter,2.5
Store 3,Vinod,Bird Seed,5.0


There are many ways we can extract the data BUT
1. If we want a row out then use the loc/iloc attribute as these are reserved for the Series.
2. If you want to extract a column then Pandas developers have reserved indexing operator [] for it i.e.
we can directly do indexing on the Data Frame.

In [42]:
# Right way of extracting a Column (using indexing operator)
df['Item Purchased']

Store 1      Dog Food
Store 1    Cat Litter
Store 3     Bird Seed
Name: Item Purchased, dtype: object

In [43]:
# This is called Chaining
# data_frame['row_label']['column_label']
# Although Chaining is able to extract the data but it is not recommended and should be avoided
# as much as possible because firstly, it is relatively slow and secondly, it returns a copy of DF instead
# of DF itself.
# Chaining can also lead to error if we want to change a data

df[:]['Item Purchased']

Store 1      Dog Food
Store 1    Cat Litter
Store 3     Bird Seed
Name: Item Purchased, dtype: object

In [44]:
# This is also Chaining
df.loc[:]['Item Purchased']

Store 1      Dog Food
Store 1    Cat Litter
Store 3     Bird Seed
Name: Item Purchased, dtype: object

In [45]:
# Chaining
df.iloc[:]['Item Purchased']

Store 1      Dog Food
Store 1    Cat Litter
Store 3     Bird Seed
Name: Item Purchased, dtype: object

What if we want some particular data only?
Let's suppose we want to get all the costs of the Store 1 only

In [46]:
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

If we want multiple data related to a Store..
For eg. All the Items purchased with their cost from Store 1

In [47]:
df.loc['Store 1', ['Item Purchased', 'Cost']]

Unnamed: 0,Item Purchased,Cost
Store 1,Dog Food,22.5
Store 1,Cat Litter,2.5


In [48]:
# Another wrong method is to get column is to transpose and get the column like a row

# NOT RECOMMENDED 

df.T.loc['Cost']

Store 1    22.5
Store 1     2.5
Store 3       5
Name: Cost, dtype: object

In [49]:
# Get all Items Purchased with their Costs of all Stores
df.loc[:, ['Item Purchased', 'Cost']]

Unnamed: 0,Item Purchased,Cost
Store 1,Dog Food,22.5
Store 1,Cat Litter,2.5
Store 3,Bird Seed,5.0


In [50]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Cat Litter,2.5
Store 3,Vinod,Bird Seed,5.0


### Dropping a data

In [51]:
# 1. drop function is used to drop some data
# 2. It takes one argument as input
# 3. It does not alters the original frame

df.drop('Store 3')

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Cat Litter,2.5


In [52]:
# It does not alters the original frame
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Cat Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [53]:
df_copy = df.copy()


In [54]:
# 4. drop has 2 optional paramters
#     a) inplace = False (default) / True (To alter the original DF)
#     b) axis = 0 (default: to delete a row) / 1 (to delete a column)
df_copy.drop('Store 1', inplace=True)

In [55]:
df_copy

Unnamed: 0,Name,Item Purchased,Cost
Store 3,Vinod,Bird Seed,5.0


In [56]:
df_copy = df.copy()
df_copy.drop('Cost', inplace=True, axis=1)

In [57]:
df_copy

Unnamed: 0,Name,Item Purchased
Store 1,Chris,Dog Food
Store 1,Kevyn,Cat Litter
Store 3,Vinod,Bird Seed


In [58]:
# 1. Another way to delete a column is by using 'del' operator using indexing operator
# 2. It directly alters the Data Frame
# 3. It is specifically for "Columns"
del df_copy['Item Purchased']

In [59]:
df_copy

Unnamed: 0,Name
Store 1,Chris
Store 1,Kevyn
Store 3,Vinod


### Adding a new column 

In [60]:
# Let's add a new Column "location" with a common value
df['Location'] = 'NYC'

In [61]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,NYC
Store 1,Kevyn,Cat Litter,2.5,NYC
Store 3,Vinod,Bird Seed,5.0,NYC


In [62]:
df.loc['Store 2'] = pd.Series({'Name': 'Mohit', 'Item Purchased': 'Snacks', 'Cost': 10.0, 'Location': 'India'})

In [63]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,NYC
Store 1,Kevyn,Cat Litter,2.5,NYC
Store 3,Vinod,Bird Seed,5.0,NYC
Store 2,Mohit,Snacks,10.0,India


In [64]:
test_1 = pd.Series(['mohit', 'soni', 30], index=['fname', 'lname', 'age'])
test_2= pd.Series(['udit', 'soni', 27], index=['fname', 'lname', 'age'])
test_3 = pd.Series(['nitin', 'soni', 29], index=['fname', 'lname', 'age'])

bro_df = pd.DataFrame([test_1, test_2, test_3], index=['Jaipur', 'Bikaner', 'Banswara'])
bro_df

Unnamed: 0,fname,lname,age
Jaipur,mohit,soni,30
Bikaner,udit,soni,27
Banswara,nitin,soni,29


In [65]:
# Broadcast an operation on a Column in DF

# Let's increase the age of all the brothers after one year
bro_df['age'] += 1

In [66]:
bro_df

Unnamed: 0,fname,lname,age
Jaipur,mohit,soni,31
Bikaner,udit,soni,28
Banswara,nitin,soni,30


# Read data from CSV

In [67]:
wine_df = pd.read_csv('wine_data.csv')
wine_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [68]:
wine_df = pd.read_csv('wine_data.csv', index_col=-3)
wine_df.head()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0.1,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,variety,winery
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Nicosia 2013 Vulkà Bianco (Etna),0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,White Blend,Nicosia
Quinta dos Avidagos 2011 Avidagos Red (Douro),1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Portuguese Red,Quinta dos Avidagos
Rainstorm 2013 Pinot Gris (Willamette Valley),2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Pinot Gris,Rainstorm
St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore),3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,Riesling,St. Julian
Sweet Cheeks 2012 Vintner's Reserve Wild Child Block Pinot Noir (Willamette Valley),4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Pinot Noir,Sweet Cheeks


In [69]:
wine_df = pd.read_csv('wine_data.csv', skiprows=3)
wine_df.head()

Unnamed: 0,2,2.1,US,"Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp acidity underscoring the flavors. The wine was all stainless-steel fermented.",Unnamed: 4,87,14.0,Oregon,Willamette Valley,Willamette Valley.1,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
0,3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
1,4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
2,5,5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,Northern Spain,Navarra,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
3,6,6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
4,7,7,France,This dry and restrained wine offers spice in p...,,87,24.0,Alsace,Alsace,,Roger Voss,@vossroger,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer,Trimbach


In [70]:
wine_df = pd.read_csv('wine_data.csv', header=2)
wine_df.head()

Unnamed: 0,1,1.1,Portugal,"This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy red berry fruits and freshened with acidity. It's already drinkable, although it will certainly be better from 2016.",Avidagos,87,15.0,Douro,Unnamed: 8,Unnamed: 9,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
0,2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
1,3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
2,4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
3,5,5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,Northern Spain,Navarra,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
4,6,6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo


In [71]:
wine_df = pd.read_csv('wine_data.csv', index_col=2)
del wine_df['Unnamed: 0']
del wine_df['Unnamed: 0.1']
wine_df.head()

Unnamed: 0_level_0,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [72]:
for col in wine_df.columns:
    if col == 'description':
        wine_df.rename(columns={col:'Vivran'}, inplace=True)
    if col == 'variety':
        wine_df.rename(columns={col: col + ' aka PRAJATI'}, inplace=True)

wine_df.head()

Unnamed: 0_level_0,Vivran,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety aka PRAJATI,winery
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [73]:
animal = pd.read_csv('animal_data.csv', sep='|', skiprows=1, names=["number", "colour", "(a|1)", "animal"])
animal

Unnamed: 0,number,colour,(a|1),animal
0,1,green,x,dog
1,2,blue,y,cat
2,3,red,z,owl


In [74]:
animal = pd.read_csv('animal_data.csv', sep='|', skiprows=2)
animal

Unnamed: 0,2,blue,y,cat
0,3,red,z,owl


## Querying a DataFrame

1. Querying a DF is optimised when we use ***Boolean Masking***
2. BM is the heart of fast and wfficient querying in Numpy
3. BM is either a 1-D or a 2-D array of **True/False** for a Series or DataFrame respectively
4. This array is **overlayed** on the top of the Data Structure that we are querying
5. The cell aligned with a **True** value will only be mirrored into the result

In [102]:
olympics_df = pd.read_csv('olympics.csv', header=1, index_col=0)
print(olympics_df.columns)
# for col in olympics_df.columns:
#     if '01' in col:
#         olympics_df.rename(columns={col: 'Gold'}, inplace=True)
#     if '02' in col:
#         olympics_df.rename(columns={col: 'Silver'}, inplace=True)
#     if '03' in col:
#         olympics_df.rename(columns={col: 'Bronze'}, inplace=True)
# olympics_df.head()

Unnamed: 0,№ Summer,Gold,Silver,Bronze,Total,№ Winter,Gold.1,Silver.1,Bronze.1,Total.1,№ Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
