# Numpy and Pandas

### Numpy

A foundational library for numerical computing in python, optimized for performance.

### Pandas

A high level library for data manipulation

In [2]:
# Importing Numpy

import numpy as np

arr1 = np.array([1, 2, 3, 4, 5]) #1D array
arr2 = np.array([[1, 2, 3], [4, 5, 6]]) #2D array

print("1 Dimensional array", arr1)
print("2 Dimensional array",arr2)

1 Dimensional array [1 2 3 4 5]
2 Dimensional array [[1 2 3]
 [4 5 6]]


In [4]:
arr3 =np.arange(12).reshape(3, 4)
arr3

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [7]:
# array operations

print(arr1 * 2)
print(arr1 + arr1)

[ 2  4  6  8 10]
[ 2  4  6  8 10]


In [8]:
# matrix operations

mat1 = np.array([[1, 2], [3, 4]])
mat2 = np.array([[5, 6], [7, 8]])

print(np.dot(mat1, mat2))

[[19 22]
 [43 50]]


In [11]:
# Statistical operations

data = np.random.normal(0, 1, 1000)
print(f'mean: {round(data.mean())}, std: {round(data.std())}')

mean: 0, std: 1


In [14]:
# Slicing

print(mat1[1, 1])
print(mat1[:, 1])

4
[2 4]


In [15]:
# Importing the Pandas

import pandas as pd


In [16]:
data = {
    'Name': ['alice', 'bob', 'charlie', 'david'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Salary
0,alice,25,50000
1,bob,30,60000
2,charlie,35,70000
3,david,40,80000


In [18]:
# Basic Operations

print(df.head(2))
print(df.describe())

    Name  Age  Salary
0  alice   25   50000
1    bob   30   60000
             Age        Salary
count   4.000000      4.000000
mean   32.500000  65000.000000
std     6.454972  12909.944487
min    25.000000  50000.000000
25%    28.750000  57500.000000
50%    32.500000  65000.000000
75%    36.250000  72500.000000
max    40.000000  80000.000000


In [20]:
print(df['Age'].mean())

32.5


In [21]:
df_with_nan = pd.DataFrame({
    'A': [1,2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8 ]
})

print(df_with_nan.fillna(value=0))

     A    B
0  1.0  5.0
1  2.0  0.0
2  0.0  0.0
3  4.0  8.0


In [22]:
print(df_with_nan.dropna())

     A    B
0  1.0  5.0
3  4.0  8.0


In [24]:
# Grouping and Aggregation

df['Department'] = ['HR', 'Tech', 'Tech', 'HR']
print(df.groupby('Department').mean())
df

             Age   Salary
Department               
HR          32.5  65000.0
Tech        32.5  65000.0


Unnamed: 0,Name,Age,Salary,Department
0,alice,25,50000,HR
1,bob,30,60000,Tech
2,charlie,35,70000,Tech
3,david,40,80000,HR


In [None]:
df2 = pd.DataFrame({
    'Name': ['Alice']
})