## Introduction to NumPy

In [2]:
#Installation and setup
#pip install numpy

### Creating NumPy arrays

In [3]:
#Creating NumPy arrays
import numpy as np

#Create an array from a list
arr1=np.array([1,2,3,4,5])
print(arr1)

#Create an array of zeros
arr2=np.zeros(5)
print(arr2)

# Create an array of ones
arr3 = np.ones((3, 3))
print(arr3)

# Create an array of evenly spaced values
arr4 = np.arange(0, 10, 2)   #Starts at 0, goes until 10 (exclusive) by intervals of 2
print(arr4)

# Create an array of random values
arr5 = np.random.rand(3, 3)
print(arr5)

[1 2 3 4 5]
[0. 0. 0. 0. 0.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[0 2 4 6 8]
[[0.58328518 0.12901761 0.03907685]
 [0.83383585 0.90688375 0.26726554]
 [0.30669582 0.33124307 0.08063788]]


In [4]:
#Array attributes: shape,size, dtype

#Shape of the array
print(arr1.shape)

#Size of the array
print(arr1.size)

#Data type of the array
print(arr1.dtype)

(5,)
5
int32


In [5]:
#Indexing and slicing arrays

# Accessing elements
print(arr1[0])

# Slicing
print(arr1[1:4])


1
[2 3 4]


In [6]:
#Array operations: arithmetic, aggregation, broadcasting

# Arithmetic operations
arr6 = arr1 + arr2
print(arr6)

# Aggregation functions
print(np.sum(arr1))

# Broadcasting
arr7 = arr1 * 2
print(arr7)


[1. 2. 3. 4. 5.]
15
[ 2  4  6  8 10]


### Array Manipulation

In [7]:
#Reshaping arrays

# Reshape array
arr8 = np.arange(9).reshape(3, 3)   #starts at 0 goes upto 9(excluding) and reshapes as 3x3
print(arr8)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [8]:
#Stacking and splitting arrays
# Stacking arrays vertically
arr9 = np.vstack((arr8, arr8))
print(arr9)

# Splitting arrays
arr10, arr11 = np.split(arr9, 2)
print(arr10, arr11)

[[0 1 2]
 [3 4 5]
 [6 7 8]
 [0 1 2]
 [3 4 5]
 [6 7 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]] [[0 1 2]
 [3 4 5]
 [6 7 8]]


In [9]:
#Transposing arrays
# Transpose array
arr12 = arr8.T
print(arr12)

[[0 3 6]
 [1 4 7]
 [2 5 8]]


In [10]:
#Universal functions (ufuncs)
# Universal functions
arr13 = np.sin(arr1)
print(arr13)


[ 0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427]


### Advanced NumPy

In [11]:
# Fancy indexing
indices = np.array([0, 2, 4])  #make the indices that you want to get from arr1
print(arr1[indices])


[1 3 5]


In [12]:
# Boolean indexing
bool_arr = arr1 > 3
print(arr1[bool_arr])

[4 5]


In [13]:
# Vectorized operations
arr14 = arr1 + 10
print(arr14)


[11 12 13 14 15]


In [14]:
# Broadcasting
arr15 = arr1 + np.array([[10], [20], [30], [40], [50]])
print(arr15)

[[11 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


## Introduction to Pandas

In [15]:
#Installation and setup
#pip install pandas 

#### Working with Series

In [16]:
#Creating Series objects

import pandas as pd
import numpy as np

# Create Series from a list
s1 = pd.Series([1, 2, 3, 4, 5])
print(s1)

# Create Series from an array
s2 = pd.Series(np.array([1, 2, 3, 4, 5]))
print(s2)

# Create Series from a dictionary
s3 = pd.Series({'a': 1, 'b': 2, 'c': 3})
print(s3)

0    1
1    2
2    3
3    4
4    5
dtype: int64
0    1
1    2
2    3
3    4
4    5
dtype: int32
a    1
b    2
c    3
dtype: int64


In [18]:
#Indexing and slicing Series

# Accessing elements by label
print(s3['a'])

# Accessing elements by position
print(s3[0])

# Slicing
print(s3[:2])

1
2
a    1
b    2
dtype: int64


  print(s3[1])


In [21]:
#Operations on Series

# Arithmetic operations
s4 = s1 + s2
print(s4)

# Element-wise operations
s5 = s1 * 2
print(s5)

# Aggregation functions
print(s1.sum())

0     2
1     4
2     6
3     8
4    10
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64
15


In [24]:
#Handling missing data

# Drop missing values
s6 = s1.dropna()

# Fill missing values
s7 = s1.fillna(0)

# Check for missing values
print(s1.isnull())


0    False
1    False
2    False
3    False
4    False
dtype: bool


### Working with DataFrames

In [27]:
#Creating DataFrames

# Create DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'],
 'Age': [25, 30, 35]}
df1 = pd.DataFrame(data)
print(df1)

# Create DataFrame from a list of lists
data = [['Alice', 25], ['Bob', 30], ['Charlie', 35]]
df2 = pd.DataFrame(data, columns=['Name', 'Age'])
print(df2)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [28]:
#Indexing and slicing DataFrames

# Label-based indexing
print(df1.loc[0, 'Name'])

# Position-based indexing
print(df1.iloc[0, 0])

# Slicing
print(df1[:2])

Alice
Alice
    Name  Age
0  Alice   25
1    Bob   30


In [29]:
#Basic operations

# Sorting
df1_sorted = df1.sort_values(by='Age')
print(df1_sorted)   

# Filtering
df1_filtered = df1[df1['Age'] > 30]
print(df1_filtered)

# Selecting columns
names = df1['Name']
print(names)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
2  Charlie   35
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object


In [36]:
#Data manipulation

# Adding a column
df1['Gender'] = ['Female', 'Male', 'Male']

# Deleting a column
del df1['Gender']

# Updating a column
df1['Age'] = df1['Age'] + 1

### Data Cleaning and Preparation

In [37]:
#Handling missing values

# Drop missing values
df1_cleaned = df1.dropna()

# Fill missing values
df1_filled = df1.fillna(0)

# Check for missing values
print(df1.isnull().any())

Name    False
Age     False
dtype: bool


In [41]:
#Data normalization(numerical value only)

# Min-Max normalization
df1_normalized = (df1 - df1.min()) / (df1.max() - df1.min())

# Standardization
df1_standardized = (df1 - df1.mean()) / df1.std()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [40]:
#Data transformation

# Log transformation
df1['Log_Age'] = np.log(df1['Age'])

# Encoding categorical variables
df1_encoded = pd.get_dummies(df1, columns=['Gender'])

KeyError: "None of [Index(['Gender'], dtype='object')] are in the [columns]"

In [42]:
#Data aggregation and grouping

# Group by age and calculate mean age
age_groups = df1.groupby('Age')
mean_age = age_groups['Age'].mean()

### Advance Pandas

In [47]:
## Merging, Joining, and Concatenating DataFrames

#Concatenating DataFrames
import pandas as pd

# Create sample DataFrames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
 'B': ['B0', 'B1', 'B2']})

df2 = pd.DataFrame({'A': ['A3', 'A4', 'A5'],
 'B': ['B3', 'B4', 'B5']})

# Concatenate along rows
result_row = pd.concat([df1, df2])

# Concatenate along columns
result_col = pd.concat([df1, df2], axis=1)

In [48]:
#Merging DataFrames

# Create sample DataFrames
left = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
 'value': ['V0', 'V1', 'V2']})

right = pd.DataFrame({'key': ['K1', 'K2', 'K3'],
 'value': ['V1', 'V2', 'V3']})

# Inner join
inner_join = pd.merge(left, right, on='key', how='inner')

# Left join
left_join = pd.merge(left, right, on='key', how='left')

# Right join
right_join = pd.merge(left, right, on='key', how='right')

# Outer join
outer_join = pd.merge(left, right, on='key', how='outer')

In [52]:
#Joining DataFrames

# Create sample DataFrames
left = pd.DataFrame({'value1': [1, 2, 3]}, index=['a', 'b', 'c'])
right = pd.DataFrame({'value2': [4, 5, 6]}, index=['a', 'b', 'd'])

# Join based on index
join_df = left.join(right, how='inner')
