# Hello Numpy, Hello Pandas

## Numpy

In Numpy, a list is not just a list, it is an "ndarray" (n-dimensional array).

In [1]:
# Python
l = [
     [1,2,3],
     [4,5,6],
     [7,8,9]
    ]
l

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [2]:
# Numpy
import numpy as np
# of course it can't be simple: the recommended constructor for an ndarray is actually "array"
a = np.array([
              [1,2,3],
              [4,5,6],
              [7,8,9]
             ])
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [3]:
# Python
l[2]

[7, 8, 9]

In [4]:
# Numpy
a[2]

array([7, 8, 9])

In [5]:
l[2][1]

8

In [6]:
a[2][1]

8

In [7]:
# Notice how python list indexing is different from numpy indexing
# It doesn't allow for the comma-based indexing
l[2,1]

TypeError: list indices must be integers or slices, not tuple

In [8]:
# Numpy does though
a[2,1]

8

In [10]:
# Again, notice how native python lists don't allow for grabbing a whole column in a 2D array
l[:,1]

TypeError: list indices must be integers or slices, not tuple

In [11]:
# Numpy easily allows us to grab whole columns
a[:,1]

array([2, 5, 8])

In [12]:
# Generating random ndarrays
# Fill with ints up to 10, 8 rows, 4 columns
X = np.random.randint(10, size=(8,4))
X

array([[8, 8, 6, 5],
       [5, 9, 1, 0],
       [6, 6, 2, 1],
       [7, 5, 2, 1],
       [3, 5, 1, 2],
       [6, 3, 0, 6],
       [8, 6, 1, 0],
       [5, 8, 9, 3]])

## Pandas

In Pandas, an ndarray is not just an ndarray, it is a dataframe.

In [13]:
import pandas as pd
df = pd.DataFrame(X, columns=['a','b','c','d'])
df

Unnamed: 0,a,b,c,d
0,8,8,6,5
1,5,9,1,0
2,6,6,2,1
3,7,5,2,1
4,3,5,1,2
5,6,3,0,6
6,8,6,1,0
7,5,8,9,3


In [14]:
# Numpy
X[2]

array([6, 6, 2, 1])

In [15]:
# Pandas
# don't do 'df[2]' !
df.iloc[2]

a    6
b    6
c    2
d    1
Name: 2, dtype: int32

In [16]:
X[2,0]

6

In [17]:
df.iloc[2,0]

6

In [18]:
X[:,0]

array([8, 5, 6, 7, 3, 6, 8, 5])

In [19]:
df.iloc[:,0]

0    8
1    5
2    6
3    7
4    3
5    6
6    8
7    5
Name: a, dtype: int32

In [22]:
# Format the dataframe for display purposes
df_styled = df.style
df_styled.set_caption("My example table")
df_styled.format("{:.2e}")
highlighter = lambda x: 'font-weight: bold' if x > 7 else 'font-weight: normal'
df_styled.applymap(highlighter)
display(df_styled)

Unnamed: 0,a,b,c,d
0,8.0,8.0,6.0,5.0
1,5.0,9.0,1.0,0.0
2,6.0,6.0,2.0,1.0
3,7.0,5.0,2.0,1.0
4,3.0,5.0,1.0,2.0
5,6.0,3.0,0.0,6.0
6,8.0,6.0,1.0,0.0
7,5.0,8.0,9.0,3.0


In [23]:
# Write the df to a csv file
df.to_csv("my_csv.csv")

# Read the csv file into a df
df2 = pd.read_csv("my_csv.csv")

In [26]:
# Create a latex table from the styled df
print(df_styled.to_latex())

\begin{table}
\caption{My example table}
\begin{tabular}{lrrrr}
 & a & b & c & d \\
0 & \font-weightbold 8.00e+00 & \font-weightbold 8.00e+00 & \font-weightnormal 6.00e+00 & \font-weightnormal 5.00e+00 \\
1 & \font-weightnormal 5.00e+00 & \font-weightbold 9.00e+00 & \font-weightnormal 1.00e+00 & \font-weightnormal 0.00e+00 \\
2 & \font-weightnormal 6.00e+00 & \font-weightnormal 6.00e+00 & \font-weightnormal 2.00e+00 & \font-weightnormal 1.00e+00 \\
3 & \font-weightnormal 7.00e+00 & \font-weightnormal 5.00e+00 & \font-weightnormal 2.00e+00 & \font-weightnormal 1.00e+00 \\
4 & \font-weightnormal 3.00e+00 & \font-weightnormal 5.00e+00 & \font-weightnormal 1.00e+00 & \font-weightnormal 2.00e+00 \\
5 & \font-weightnormal 6.00e+00 & \font-weightnormal 3.00e+00 & \font-weightnormal 0.00e+00 & \font-weightnormal 6.00e+00 \\
6 & \font-weightbold 8.00e+00 & \font-weightnormal 6.00e+00 & \font-weightnormal 1.00e+00 & \font-weightnormal 0.00e+00 \\
7 & \font-weightnormal 5.00e+00 & \font-weightbol