Write a Python program that reads a CSV file containing numerical data (e.g., a matrix) and uses NumPy to perform the following tasks:

- Load the data into a NumPy array.
- Calculate the mean and standard deviation for each column.
- Normalize the data (subtract the mean and divide by the standard deviation).
- Write the normalized data to a new CSV file.

download csv file from following link.
https://www.kaggle.com/datasets/thedevastator/higher-education-predictors-of-student-retention/download?datasetVersionNumber=2

In [15]:
import pandas as pd
import numpy as np

# Assuming 'dataset.csv' is the name of your CSV file
df = pd.read_csv('dataset.csv')

print(df)
# Select only numeric columns
numeric_columns = df.select_dtypes(include=[np.number])

# Extract numerical data and convert to NumPy array
data = numeric_columns.values

# Calculate mean and standard deviation for each column
column_means = np.mean(data, axis=0)
column_stddevs = np.std(data, axis=0)

# Normalize the data
normalized_data = (data - column_means) / column_stddevs

# Create a new DataFrame with normalized data
normalized_df = pd.DataFrame(normalized_data, columns=numeric_columns.columns)

# Add non-numeric columns back to the DataFrame
non_numeric_columns = df.select_dtypes(exclude=[np.number])
normalized_df = pd.concat([non_numeric_columns, normalized_df], axis=1)

# Write the normalized data to a new CSV file
normalized_df.to_csv('normalized_dataset.csv', index=False)


      Marital status  Application mode  Application order  Course  \
0                  1                 8                  5       2   
1                  1                 6                  1      11   
2                  1                 1                  5       5   
3                  1                 8                  2      15   
4                  2                12                  1       3   
...              ...               ...                ...     ...   
4419               1                 1                  6      15   
4420               1                 1                  2      15   
4421               1                 1                  1      12   
4422               1                 1                  1       9   
4423               1                 5                  1      15   

      Daytime/evening attendance  Previous qualification  Nacionality  \
0                              1                       1            1   
1                        

Create a Python script that processes a log file containing timestamps and events. Implement the following:

- Use a generator to read the file line by line.
- Parse the timestamp and handle date-related exceptions.
- Classify and count different types of events.
- Write the summary to a new file, including the count of each event type.

download csv file from following link. https://static.celonis.cloud/static/quickstarts/samplefiles/flight_event_log.csv

In [13]:
import csv
from datetime import datetime

def load_data(file_path):
    data = []

    def parse_timestamp(timestamp_str):
        try:
            timestamp = datetime.strptime(timestamp_str, '%m/%d/%Y %H:%M:%S')
            return timestamp
        except ValueError as e:
            print(f"Error parsing timestamp '{timestamp_str}': {e}")
            return None

    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            # Parse timestamp and handle date-related exceptions
            timestamp = parse_timestamp(row.get('timestamp', '').strip())
            if timestamp is not None:
                row['timestamp'] = timestamp
                data.append(row)

    return data

def process_log_data(log_data):
    event_counts = {}

    for row in log_data:
        event = row.get('activity', '').strip()
        event_counts[event] = event_counts.get(event, 0) + 1

    return event_counts

def write_summary(output_file, event_counts):
    with open(output_file, 'w', newline='', encoding='utf-8') as summary_file:
        writer = csv.writer(summary_file)
        writer.writerow(['Event', 'Count'])
        for event, count in event_counts.items():
            writer.writerow([event, count])

if __name__ == "__main__":
    input_file = "flight_event_log.csv"
    output_file = "event_summary.csv"

    # Step 1: Load data using a generator
    log_data = load_data(input_file)

    # Step 2: Process data to classify and count events
    event_counts = process_log_data(log_data)

    # Step 3: Write the summary to a new CSV file
    write_summary(output_file, event_counts)

    print("Summary written to", output_file)


Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Error parsing timestamp '': time data '' does not match format '%m/%d/%Y %H:%M:%S'
Erro

### Given a 3D NumPy array representing a cube with random values, write a function to perform the following:

- Identify the coordinates of the maximum and minimum values.
- Create a new array that extracts a 2x2x2 sub-cube centered around the maximum value.
- Calculate the mean and standard deviation of the sub-cube.

In [6]:
import numpy as np

def analyze_cube(cube):
    # Identify coordinates of maximum and minimum values
    max_coords = np.unravel_index(np.argmax(cube), cube.shape)
    min_coords = np.unravel_index(np.argmin(cube), cube.shape)

    # Ensure that slices are within valid range
    sub_cube_slices = tuple(slice(max(max_coord - 1, 0), min(max_coord + 2, size)) for max_coord, size in zip(max_coords, cube.shape))
    
    # Extract a 2x2x2 sub-cube centered around the maximum value
    sub_cube = cube[sub_cube_slices]

    # Calculate the mean and standard deviation of the sub-cube
    sub_cube_mean = np.nanmean(sub_cube)
    sub_cube_std = np.nanstd(sub_cube)

    return max_coords, min_coords, sub_cube, sub_cube_mean, sub_cube_std

# Example usage:
# Create a 3D NumPy array representing a cube with random values
random_cube = np.random.rand(5, 5, 5)

# Call the function to analyze the cube
max_coords, min_coords, sub_cube, sub_cube_mean, sub_cube_std = analyze_cube(random_cube)

# Print the results
print("Coordinates of Maximum Value:", max_coords)
print("Coordinates of Minimum Value:", min_coords)
print("\n2x2x2 Sub-Cube Centered Around Maximum Value:")
print(sub_cube)
print("\nMean of the Sub-Cube:", sub_cube_mean)
print("Standard Deviation of the Sub-Cube:", sub_cube_std)


Coordinates of Maximum Value: (1, 0, 4)
Coordinates of Minimum Value: (1, 4, 4)

2x2x2 Sub-Cube Centered Around Maximum Value:
[[[0.76116092 0.54090211]
  [0.12827851 0.6147229 ]]

 [[0.265336   0.99778323]
  [0.1310486  0.17449373]]

 [[0.22418524 0.18114565]
  [0.91182817 0.34620151]]]

Mean of the Sub-Cube: 0.4397572137878613
Standard Deviation of the Sub-Cube: 0.3018828435014033


### Implement a Python program that uses NumPy to perform matrix multiplication. 
- Optimize the multiplication by parallelizing the computation using the numpy.einsum function. 
- Compare the performance with a regular matrix multiplication. 
- Handle any potential exceptions related to parallel processing.

In [3]:
import numpy as np
import time
from concurrent.futures import ProcessPoolExecutor, as_completed

def parallel_matrix_multiplication(A, B):
    try:
        with ProcessPoolExecutor() as executor:
            
            block_size = len(A) // executor._max_workers
            blocks_A = [A[i:i+block_size, :] for i in range(0, len(A), block_size)]
            blocks_B = [B[:, i:i+block_size] for i in range(0, len(B[0]), block_size)]

            
            futures = [executor.submit(np.einsum, 'ij,jk->ik', block_A, block_B)
                       for block_A, block_B in zip(blocks_A, blocks_B)]

            results = [future.result() for future in as_completed(futures)]

            
            result = np.concatenate(results, axis=1)

        return result

    except Exception as e:
        print(f"Error during parallel matrix multiplication: {e}")
        return None

def regular_matrix_multiplication(A, B):
    try:
        result = np.dot(A, B)
        return result

    except Exception as e:
        print(f"Error during regular matrix multiplication: {e}")
        return None

if __name__ == "__main__":
    try:
        
        A = np.random.rand(1000, 1000)
        B = np.random.rand(1000, 1000)

        
        start_time_parallel = time.time()
        result_parallel = parallel_matrix_multiplication(A, B)
        end_time_parallel = time.time()

        
        start_time_regular = time.time()
        result_regular = regular_matrix_multiplication(A, B)
        end_time_regular = time.time()

        
        if result_parallel is not None and result_regular is not None:
        
            print("Shape of Parallel Result:", result_parallel.shape)
            print("Shape of Regular Result:", result_regular.shape)

            
            if result_parallel.shape == result_regular.shape:
                print("\nShapes are compatible for comparison.")
                
                if np.allclose(result_parallel, result_regular):
                    print("\nThe results from parallel and regular matrix multiplication are approximately equal.")
                else:
                    print("\nThe results from parallel and regular matrix multiplication differ.")
            else:
                print("\nShapes are not compatible for comparison.")
        else:
            print("Matrix multiplication failed. Check for errors.")


        print("\nParallel Matrix Multiplication Time:", end_time_parallel - start_time_parallel, "seconds")
        print("Regular Matrix Multiplication Time:", end_time_regular - start_time_regular, "seconds")

    except Exception as main_exception:
        print(f"An error occurred in the main program: {main_exception}")


Shape of Parallel Result: (250, 1000)
Shape of Regular Result: (1000, 1000)

Shapes are not compatible for comparison.

Parallel Matrix Multiplication Time: 0.25842928886413574 seconds
Regular Matrix Multiplication Time: 0.1141214370727539 seconds
