# Pandas Introduction

#### **Imports**

In [1]:
import pandas as pd
import numpy as np

### Pandas Series

In [2]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [3]:
g7_pop.name = "G7 Population in Millions"
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in Millions, dtype: float64

We can change the index of the series

In [5]:
g7_pop.index =[
    "Canada",
    "France",
    "Germany",
    "Italy",
    "Japan",
    "United Kingdom",
    "United States",
]
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

### **Indexing**
Indexing works similarly to lists and dictionaries, you use the index of the element you are loking for

In [6]:
g7_pop["Canada"]

35.467

In [9]:
g7_pop.iloc[0]

35.467

In [10]:
g7_pop[["Italy", "France"]]

Italy     60.665
France    63.951
Name: G7 Population in Millions, dtype: float64

## DataFrame


In [18]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])


In [20]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [23]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
0,35.467,1785387,9984670,0.913,America,
1,63.951,2833687,640679,0.888,Europe,
2,80.94,3874437,357114,0.916,Europe,
3,60.665,2167744,301336,0.873,Europe,
4,127.061,4602367,377930,0.891,Asia,


In [24]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,
Germany,80.94,3874437,357114,0.916,Europe,
Italy,60.665,2167744,301336,0.873,Europe,
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


### Conditional Selection (Boolean arrays)

<code> df.loc[df['population'] > 70] </code>

### **Dropping stuff**

- **Drop rows**
    - <code>df.drop('Canada') </code>
    - <code>df.drop(['Canada', 'Japan']) </code>
    - <code>df.drop(['Canada', 'Japan'], axis = row) </code>
- **Drop columns**
    - <code>df.drop('Population') </code>
    - <code>df.drop(['Population', 'GDP'], axis = 0) </code>
    - <code>df.drop(['Population', 'GDP'], axis = columns) </code>


### Modifying DataFrames

#### **Adding a new column**

As we know, a column is a pandas series, so we can create one series to represents our new column

In [16]:
lang = pd.Series(
    ["French", "German", "Italian"],
    index=["France", "Germany", "Italy"],
    name="Language",
)

In [25]:
df["Language"] = lang
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


**Replacing values per column**

In [26]:
df['Language'] = 'English'
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


**Renaming columns**

In [28]:
df.rename(
    columns={
        "HDI": "Human Development Index",
        "Continent": "Continent Name",
    },
    index={
        "United Kingdom": "UK",
        "United States": "USA",
    },
)

Unnamed: 0,Population,GDP,Surface Area,Human Development Index,Continent Name,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [29]:
df.rename(index=str.upper)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
CANADA,35.467,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.665,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602367,377930,0.891,Asia,English
UNITED KINGDOM,64.511,2950039,242495,0.907,Europe,English
UNITED STATES,318.523,17348075,9525067,0.915,America,English


### Creating column from other columns

In [31]:
df['GDP per population'] = df['GDP'] / df['Population']
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,GDP per population
Canada,35.467,1785387,9984670,0.913,America,English,50339.385908
France,63.951,2833687,640679,0.888,Europe,English,44310.284437
Germany,80.94,3874437,357114,0.916,Europe,English,47868.013343
Italy,60.665,2167744,301336,0.873,Europe,English,35733.025633
Japan,127.061,4602367,377930,0.891,Asia,English,36221.712406
United Kingdom,64.511,2950039,242495,0.907,Europe,English,45729.239975
United States,318.523,17348075,9525067,0.915,America,English,54464.12033
