In [8]:
import pandas as pd
import numpy as np

In [65]:
np.random.seed(42)

number_records = 150

data = {
    "age": np.random.randint(18, 50, size=number_records),
    "income": np.random.randint(20000, 50000, size=number_records),
}

df = pd.DataFrame(data)

nan_indices_income = np.random.choice(df.index, 50, replace=False)
nan_indices_score = np.random.choice(df.index, 30, replace=False)

df.loc[nan_indices_income, "income"] = np.nan

df

Unnamed: 0,age,income
0,24,33986.0
1,37,49090.0
2,46,32666.0
3,32,
4,28,
...,...,...
145,48,25791.0
146,28,46309.0
147,36,41919.0
148,34,


#### Problem 1: Compute (a) mean, (b) median, and (c) age-weighted mean of income. Ignore NaNs where appropriate. Explain when a weighted mean is preferable. 

In [67]:
# mean of income
mean_income = df['income'].mean()
print(f"Mean Income: {mean_income:,.2f}")

# median of incom
median_income = df['income'].median()
print(f"Median Income: {median_income:,.2f}")

# age-weighted mean of income
temp_df = df.dropna(subset=['income', 'age'])
age_weighted_mean_income = np.average(temp_df['income'], weights=temp_df['age'])
print(f"Age-Weighted Mean Income: {age_weighted_mean_income:,.2f}")

Mean Income: 37,284.79
Median Income: 38,676.00
Age-Weighted Mean Income: 37,411.74


#### Problem 2: Standardize income (z-score). Report how many incomes are outliers using rule |z| > 3. Handle NaNs correctly (do not drop entire rows unnecessarily). 

In [69]:
income_mean = df['income'].mean()
income_std = df['income'].std()

# z score
df['income_zscore'] = (df['income'] - income_mean) / income_std

# z>3 (outliers)
num_outliers = (df['income_zscore'].abs() > 3).sum()

print(f"Mean Income for Z-Score Calculation: {income_mean:.2f}")
print(f"Std Dev of Income for Z-Score Calculation: {income_std:.2f}\n")
print("First 5 rows with Z-scores:")
print(df[['income', 'income_zscore']].head())
print(f"\nNumber of income outliers (|z| > 3): {num_outliers}")

Mean Income for Z-Score Calculation: 37284.79
Std Dev of Income for Z-Score Calculation: 8310.64

First 5 rows with Z-scores:
    income  income_zscore
0  33986.0      -0.396936
1  49090.0       1.420494
2  32666.0      -0.555768
3      NaN            NaN
4      NaN            NaN

Number of income outliers (|z| > 3): 0


#### Problem 3: Create age bins: [18-25), [25-35), [35-45), [45-60) and compute for each bin: count of observations, mean income, median score. Show result as a tidy DataFrame sorted by age bin. 

In [75]:
bins = [18, 25, 35, 45, 60]

df['age_bin'] = pd.cut(df['age'], bins=bins, right=False)

age_bin_analysis = (
    df.groupby('age_bin', observed=True)
    .agg(count_of_observations=('age', 'count'), mean_income=('income', 'mean')))

age_bin_analysis

Unnamed: 0_level_0,count_of_observations,mean_income
age_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
"[18, 25)",26,36239.0
"[25, 35)",47,37092.555556
"[35, 45)",47,38340.314286
"[45, 60)",30,36659.368421


#### Problem 4: Create an array it cannot be of 1 Dimension. And then showcase the operation for the following:  Shape and Resize → shape, size, Transpose, Flatten Showcasing negative indexing and display error while doing slicing Arithmetic Operations → Broadcasting, Dot Product Linear Algebra → Determinant, Inverse 

In [77]:
# 2D array
my_array = np.arange(12).reshape(3, 4)
print("Original Array:")
print(my_array)

Original Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [79]:
# Shape and Resize
print("\n--- Shape and Resize Operations ---")
print(f"Shape of the array: {my_array.shape}")
print(f"Size (total elements) of the array: {my_array.size}")
print("Transposed Array (T):")
print(my_array.T)
print("Flattened Array (1D):")
print(my_array.flatten())


--- Shape and Resize Operations ---
Shape of the array: (3, 4)
Size (total elements) of the array: 12
Transposed Array (T):
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
Flattened Array (1D):
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [46]:
# Negative Indexing and Slicing Error
print("\n--- Indexing and Slicing ---")
# Negative indexing gets the last element from the last row
last_element = my_array[-1, -1]
print(f"Last element using negative indexing my_array[-1, -1]: {last_element}")


--- Indexing and Slicing ---
Last element using negative indexing my_array[-1, -1]: 11


In [48]:
# Displaying an error while slicing
try:
    error_slice = my_array[5, 5]
except IndexError as e:
    print(f"\nSuccessfully caught an expected error: {e}")


Successfully caught an expected error: index 5 is out of bounds for axis 0 with size 3


In [50]:
# Arithmetic Operations
print("\n--- Arithmetic Operations ---")
# Broadcasting: adding a scalar to the array
broadcasted_array = my_array + 100
print("Broadcasting (Array + 100):")
print(broadcasted_array)


--- Arithmetic Operations ---
Broadcasting (Array + 100):
[[100 101 102 103]
 [104 105 106 107]
 [108 109 110 111]]


In [52]:
# Dot Product
array_b = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
print("\nSecond array for dot product (shape 4x2):")
print(array_b)

dot_product = my_array @ array_b # Using the @ operator
print("Dot Product of a 3x4 and 4x2 array (result is 3x2):")
print(dot_product)


Second array for dot product (shape 4x2):
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
Dot Product of a 3x4 and 4x2 array (result is 3x2):
[[ 34  40]
 [ 98 120]
 [162 200]]


In [54]:
# Linear Algebra
print("\n--- Linear Algebra ---")
# Determinant and Inverse require a square matrix (NxN)
square_matrix = np.array([[4, 7], [2, 6]])
print("Square Matrix:")
print(square_matrix)


--- Linear Algebra ---
Square Matrix:
[[4 7]
 [2 6]]


In [56]:
# Calculate the determinant
determinant = np.linalg.det(square_matrix)
print(f"\nDeterminant of the square matrix: {determinant:.2f}")


Determinant of the square matrix: 10.00


In [58]:
# Calculate the inverse
if determinant != 0:
    inverse_matrix = np.linalg.inv(square_matrix)
    print("Inverse of the square matrix:")
    print(inverse_matrix)
else:
    print("Matrix does not have an inverse (determinant is zero).")

Inverse of the square matrix:
[[ 0.6 -0.7]
 [-0.2  0.4]]
