# Data Structures in Pandas

In [2]:
import pandas as pd
import numpy as np

## Series

Series is 1D labeled array capable of holding any datatype (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. 

In [3]:
data = [1,2,3,5,5]
s = pd.Series(data)

print(s)

0    1
1    2
2    3
3    5
4    5
dtype: int64


In [4]:
# from ndarray
s = pd.Series(np.random.randn(5), index = ["a", "b", "c", "d", "e"])
print(s)

a    1.622861
b    1.157841
c   -0.893660
d   -1.694600
e   -0.989152
dtype: float64


In [5]:
# from dict
d = {"b" : 1, "a" : 3, "c" : 4}

print(pd.Series(d))

b    1
a    3
c    4
dtype: int64


In [6]:
# from scalar value
print(pd.Series(69, index = ["a", "b", "c", "d"]))

a    69
b    69
c    69
d    69
dtype: int64


Series acts very similary to a ndarray, and is a valid argument to most NumPy functions

In [7]:
data = [1,2,3,5,5]
s = pd.Series(data, index = ["a", "b", "c", "d", "e"])

print(s)
print(s[:3])
print(s[s > s.median()])
print(np.exp(s))

a    1
b    2
c    3
d    5
e    5
dtype: int64
a    1
b    2
c    3
dtype: int64
d    5
e    5
dtype: int64
a      2.718282
b      7.389056
c     20.085537
d    148.413159
e    148.413159
dtype: float64


In [8]:
d = s.to_numpy()
print(d)

[1 2 3 5 5]


In [9]:
rankings = pd.Series([95, 92, 91], index = [1, 2, 3], name = "rankings")
print(rankings)

1    95
2    92
3    91
Name: rankings, dtype: int64


## DataFrame

DataFrame is a 2D labeled data structure with columns of potentially different types. 

DataFrame accepts many kinds of input:
- Dict of 1D ndarrays, lists, dicts, or Series
- 2D numpy.ndarray
- Structured or record ndarray
- A series
- Another DataFrame

In [101]:
import pandas as pd

d = {
    "player1": pd.Series([22, 13], index = ["Goals", "Assists"]),
    "player2": pd.Series([10, 7], index = ["Goals", "Assists"])
}

df = pd.DataFrame(d)
print(df)

         player1  player2
Goals         22       10
Assists       13        7


In [102]:
df.player1["Goals"]

22

In [103]:
from collections import namedtuple

Players = namedtuple("Players", "Salah DeBruyne")

df = pd.DataFrame([Players(22, 12), Players(12, 7)], index = ["Goals", "Assists"])

print(df)

         Salah  DeBruyne
Goals       22        12
Assists     12         7


In [104]:
df["Salah"]

Goals      22
Assists    12
Name: Salah, dtype: int64

In [105]:
del df["DeBruyne"]
df

Unnamed: 0,Salah
Goals,22
Assists,12


In [106]:
df["Son"] = [22, 3]
df

Unnamed: 0,Salah,Son
Goals,22,22
Assists,12,3


In [107]:
df.insert(2, "Mane", [12, np.nan])
df

Unnamed: 0,Salah,Son,Mane
Goals,22,22,12.0
Assists,12,3,


We can also convert a DataFrame into a dictionary

In [108]:
dict(df)

{'Salah': Goals      22
 Assists    12
 Name: Salah, dtype: int64,
 'Son': Goals      22
 Assists     3
 Name: Son, dtype: int64,
 'Mane': Goals      12.0
 Assists     NaN
 Name: Mane, dtype: float64}

In [109]:
contributionPerGameSalah = (df.loc["Goals"]["Salah"] +
                               df.loc["Assists"]["Salah"]) / 38
print("Contribution per Game for Salah: ", contributionPerGameSalah )

Contribution per Game for Salah:  0.8947368421052632


In [110]:
df.T

Unnamed: 0,Goals,Assists
Salah,22.0,12.0
Son,22.0,3.0
Mane,12.0,


In [111]:
df.to_numpy()

array([[22., 22., 12.],
       [12.,  3., nan]])