In [None]:
import numpy as np
# -------------------------------
# 1. Creating Different Types of Arrays
# -------------------------------

# 1D Array Creation
arr_1d = np.array([1, 2, 3, 4, 5])
print("1D Array using np.array:\n", arr_1d)

arr_arange = np.arange(0, 10, 2)  # Start, Stop, Step
print("1D Array using np.arange:\n", arr_arange)

arr_linspace = np.linspace(0, 1, 5)  # Start, Stop, Number of points
print("1D Array using np.linspace:\n", arr_linspace)

# 2D Array Creation
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("\n2D Array using np.array:\n", arr_2d)

arr_reshaped = np.arange(1, 10).reshape(3, 3)
print("2D Array using np.reshape:\n", arr_reshaped)

# 3D Array Creation
arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("\n3D Array using np.array:\n", arr_3d)

# -------------------------------
# 2. Basic Operations on Arrays
# -------------------------------

# Indexing and Slicing
print("\nElement at index 1 in 1D array:", arr_1d[1])
print("Slicing 2D Array:\n", arr_2d[:, 1:3])  # Extracts specific columns

# Reshaping, Resizing, and Flattening
reshaped_array = arr_1d.reshape(1, -1)  # Reshape 1D to 2D as a row vector
print("\nReshaped Array:\n", reshaped_array)

arr_resized = np.resize(arr_1d, (2, 3))  # Resize alters the array's shape in-place
print("Resized Array:\n", arr_resized)

flattened = arr_2d.flatten()  # Flatten a 2D array into a 1D array
print("Flattened Array:\n", flattened)

# Concatenation
concatenated = np.concatenate((arr_1d, arr_arange))  # Concatenating two 1D arrays
print("\nConcatenated Array:\n", concatenated)

arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
concatenated_2d = np.concatenate((arr1, arr2), axis=0)  # Row-wise concatenation
print("Row-wise Concatenated 2D Array:\n", concatenated_2d)

# -------------------------------
# 3. Investigate Array Attributes
# -------------------------------

# Array Attributes
print("\nShape of 2D Array:", arr_2d.shape)
print("Size of 2D Array:", arr_2d.size)
print("Data Type of 2D Array:", arr_2d.dtype)
print("Number of Dimensions:", arr_2d.ndim)

1D Array using np.array:
 [1 2 3 4 5]
1D Array using np.arange:
 [0 2 4 6 8]
1D Array using np.linspace:
 [0.   0.25 0.5  0.75 1.  ]

2D Array using np.array:
 [[1 2 3]
 [4 5 6]]
2D Array using np.reshape:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]

3D Array using np.array:
 [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]

Element at index 1 in 1D array: 2
Slicing 2D Array:
 [[2 3]
 [5 6]]

Reshaped Array:
 [[1 2 3 4 5]]
Resized Array:
 [[1 2 3]
 [4 5 1]]
Flattened Array:
 [1 2 3 4 5 6]

Concatenated Array:
 [1 2 3 4 5 0 2 4 6 8]
Row-wise Concatenated 2D Array:
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]

Shape of 2D Array: (2, 3)
Size of 2D Array: 6
Data Type of 2D Array: int64
Number of Dimensions: 2


In [7]:
# Import necessary libraries
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# -------------------------------
# 1. Load a Dataset Using NumPy
# -------------------------------
# Provide the correct path to the CSV file in Google Drive
file_path = '/content/drive/My Drive/data.csv'

# Load a CSV file. Adjust the path as needed.
data = np.genfromtxt(file_path, delimiter=',', skip_header=1)
print("Loaded Data:\n", data)

# -------------------------------
# 2. Clean and Preprocess the Data
# -------------------------------

# Handling Missing Values
# Replace NaN values in each column with the column mean, handling columns that might be entirely NaN
for i in range(data.shape[1]):
    # Suppress warnings when calculating the mean of potentially empty columns
    with np.errstate(invalid='ignore'):
        col_mean = np.nanmean(data[:, i])

    # Check if the column is completely NaN; if so, set a default value (like 0 or another meaningful number)
    if np.isnan(col_mean):
        col_mean = 0  # Replace with 0 or another default value

    # Replace NaN values in the column with the computed mean
    data[np.isnan(data[:, i]), i] = col_mean

print("\nData after handling missing values:\n", data)

# Normalization: Scaling values between 0 and 1
# Use np.where to handle division by zero in normalization
normalized_data = np.where(
    np.max(data, axis=0) - np.min(data, axis=0) == 0,
    0,  # Replace with 0 if division by zero
    (data - np.min(data, axis=0)) / (np.max(data, axis=0) - np.min(data, axis=0))
)
print("\nNormalized Data:\n", normalized_data)

# Standardization: Scaling to zero mean and unit variance
# Use np.where to handle division by zero in standardization
standardized_data = np.where(
    np.std(data, axis=0) == 0,
    0,  # Replace with 0 if division by zero
    (data - np.mean(data, axis=0)) / np.std(data, axis=0)
)
print("\nStandardized Data:\n", standardized_data)

# -------------------------------
# 3. Calculate Statistical Measures
# -------------------------------

# Mean of each column
mean = np.mean(data, axis=0)
print("\nMean of each column:", mean)

# Median of each column
median = np.median(data, axis=0)
print("Median of each column:", median)

# Standard Deviation of each column
std_dev = np.std(data, axis=0)
print("Standard Deviation of each column:", std_dev)

# Variance of each column
variance = np.var(data, axis=0)
print("Variance of each column:", variance)

# Additional Measures: Min, Max, and Sum of each column
min_values = np.min(data, axis=0)
max_values = np.max(data, axis=0)
sum_values = np.sum(data, axis=0)

print("Minimum values of each column:", min_values)
print("Maximum values of each column:", max_values)
print("Sum of each column:", sum_values)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded Data:
 [[      nan       nan 1.000e+03 7.900e+02 9.900e+01]
 [      nan       nan 1.200e+03 1.160e+03 9.500e+01]
 [      nan       nan 1.000e+03 9.290e+02 9.500e+01]
 [      nan 5.000e+02 9.000e+02 8.650e+02 9.000e+01]
 [      nan       nan 1.500e+03 1.140e+03 1.050e+02]
 [      nan       nan 1.000e+03 9.290e+02 1.050e+02]
 [      nan       nan 1.400e+03 1.109e+03 9.000e+01]
 [      nan       nan 1.500e+03 1.365e+03 9.200e+01]
 [      nan       nan 1.500e+03 1.112e+03 9.800e+01]
 [      nan       nan 1.600e+03 1.150e+03 9.900e+01]
 [      nan       nan 1.100e+03 9.800e+02 9.900e+01]
 [      nan       nan 1.300e+03 9.900e+02 1.010e+02]
 [      nan       nan 1.000e+03 1.112e+03 9.900e+01]
 [      nan       nan 1.600e+03 1.252e+03 9.400e+01]
 [      nan       nan 1.600e+03 1.326e+03 9.700e+01]
 [      nan       nan 1.600e+03 1.330e+03 9.700e+01]
 [      n

  col_mean = np.nanmean(data[:, i])
