In [None]:
!#/usr/bin/python3

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Pandas is one of the most used open source libraries for importing and analyzing data available in Python today. It provides convenient way to import, view, split, apply and combine array-like data. It is not only convenient, but efficient as well. 

In [None]:
import pandas as pd
import numpy as np

display(f"Pandas version: {pd.__version__}")

## Pandas Data Structures -- Series & DataFrame

The core value of Pandas comes through the data structure options it provides, primarily 
- Series (labeled, homogenously-typed, one-dimensional arrays)
- DataFrames (labeled, potentially heterogenously-typed, two-dimensional arrays)

## Pandas Series

### Create Series 

In [None]:
# Create empty Series

s = pd.Series(dtype='float64')

In [None]:

# Create Series from dictionary 

d = {'a': 1, 'b': 2, 'c': 3}
s = pd.Series(d)

In [None]:
# Create Series from Numpy array

a = np.array([1,2,3,4])
s = pd.Series(a, copy=False, dtype=float)

In [None]:
# Create Series from Numpy array with a defined index 

data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[10,11,12,13])

In [None]:
# Create a Series from Scalar

s = pd.Series(5, index=[0, 1, 2, 3])

### Select Series data

In [None]:
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])

# Retrieve first element
s[0]

# Retrieve first 3 elements 
s[:3]

# Retieve last 3 elements 
s[:-3]

# Retieve data via index 
s['a']

# Retrieve multiple elements via index 
s[['a', 'c', 'd']]

### Series Functions

In [None]:
# Return Series as an array 

s.values

In [None]:
# Returns shape and size of the series

s.shape
s.size 

In [None]:
# Cast Series as another data type 
s.astype('int32')

# Count non-null values in Series 
s.count()

# Cumulative Sum 
s.cumsum()

# Drop missing values
s.dropna()

## Pandas DataFrame

A DataFrame is a two-dimensional structure, where data is aligned in a tabular fashion in rows and columns. 

The columns of a dataframe are potentially heterogenously typed and the size is mutable. The axes are labeled, which allows for performing arithematic operations on the rows and columns.

### Create DataFrame

In [None]:
# Create empty dataframe 

df = pd.DataFrame()

In [None]:
# Create dataframe from list

data = [1,2,3,4,5]
df = pd.DataFrame(data)

data = [['Asia',1], ['Africa',2], ['Europe',3], ['Australia',4]]
df = pd.DataFrame(data, columns=['Continent', 'Rank'])

In [None]:
# Create dataframe from Dict of ndarrays/lists

data = {'Continent':['Asia','Africa','Europe','Australia'],'Rank':[1,2,3,4]}
df = pd.DataFrame(data, index=['rank_1','rank_2','rank_3','rank_4'])

In [None]:
# Create a dataframe from List of Dicts

data = [{'a':1, 'b':2}, {'a':12, 'b':13, 'c':14}]
pd.DataFrame(data, index=['first', 'second'])

In [None]:
# Create a DataFrame from Dict of Series

data = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)

In [None]:
# DataFrame Shape and size

df.shape
df.size

### Select, Add and Delete DataFrame data

In [None]:
data = {'Continent':['Asia','Africa','Europe','Australia'],'Rank':[1,2,3,4]}
df = pd.DataFrame(data, index=['rank_1','rank_2','rank_3','rank_4'])

In [None]:
# Select column(s)
df['Continent']

df.iloc[:,0]

df.loc[:, 'Continent']

# Select row(s)
df.loc['rank_1']

df.iloc[0]

df[0:3]

In [None]:
# Add column(s)
population = ['4.5B', '1.2B', '746M', '26M']
df['population'] = population

# Add row(s)

df2 = pd.DataFrame({'Continent': 'Antarctica', 'Rank': 5, 'population': '4K'}, index=['rank_5'])
df = df.append(df2, ignore_index = True)
df


In [None]:
# Delete column(s)

del df['population']

# Delete row(s)

df.drop(4)