In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

def load_and_prepare_data(filepath):
    df = pd.read_excel(filepath)
    df = df.drop(df.filter(like="Unnamed").columns, axis=1) 
    df = df.drop(df.filter(['Candy', 'Mango', 'Milk']), axis=1) 
    return df

def analyze_matrix(A):
    dimensionality = A.shape[1]
    num_vectors = A.shape[0]
    rank = np.linalg.matrix_rank(A)
    
    return dimensionality, num_vectors, rank

def calculate_cost_of_products(A, C):
    pseudo_inverse = np.linalg.pinv(A)
    X = pseudo_inverse @ C
    
    return X

def classify_customers(df):
    df['Class'] = df['Payment (Rs)'].apply(lambda x: 'RICH' if x > 200 else 'POOR')
    X = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']]
    y = df['Class']
    
    return X, y

def train_classifier(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = LogisticRegression()
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    
    return model, scaler, y_test, y_pred

def main(filepath):
    df = load_and_prepare_data(filepath)
    
    A = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']]
    C = df['Payment (Rs)']
    
    dimensionality, num_vectors, rank = analyze_matrix(A)
    print("Dimensionality of the matrix A: ", dimensionality)
    print("Number of vectors in matrix A: ", num_vectors)
    print("Rank of the matrix A: ", rank)
    
    X = calculate_cost_of_products(A, C)
    print("Cost of Products:")
    print("Candies: ", round(X[0], 2))
    print("Mangoes: ", round(X[1], 2))
    print("Milk Packets: ", round(X[2], 2))
    
    X_features, y_labels = classify_customers(df)
    model, scaler, y_test, y_pred = train_classifier(X_features, y_labels)
    
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

main("C:/Users/year3/Downloads/Lab Session Data.xlsx")


Dimensionality of the matrix A:  3
Number of vectors in matrix A:  10
Rank of the matrix A:  3
Cost of Products:
Candies:  1.0
Mangoes:  55.0
Milk Packets:  18.0
Confusion Matrix:
 [[2 0]
 [1 0]]

Classification Report:
               precision    recall  f1-score   support

        POOR       0.67      1.00      0.80         2
        RICH       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.33      0.50      0.40         3
weighted avg       0.44      0.67      0.53         3



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
import pandas as pd
import statistics

def load_stock_data(filepath):
    df = pd.read_excel(filepath, sheet_name='IRCTC Stock Price')
    return df

def calculate_mean_variance(df):
    prices = df.iloc[:, 3] 
    price_list = prices.astype(float).tolist()
    mean_price = statistics.mean(price_list)
    variance_price = statistics.variance(price_list)
    
    return mean_price, variance_price
def calculate_population_mean(df):
    """Calculate the population mean of the 'Price' column."""
    prices = df.iloc[:, 3]  
    price_list = prices.astype(float).tolist()
    population_mean = statistics.mean(price_list)
    return population_mean

def filter_wednesdays(df):
    """Filter the DataFrame to include only rows with dates that are Wednesdays."""
    df['Date'] = pd.to_datetime(df['Date'])  
    wednesdays = df[df['Date'].dt.day_name() == 'Wednesday']
    return wednesdays

def calculate_sample_mean(wednesdays_df):
    """Calculate the sample mean of the 'Price' column for Wednesdays."""
    prices_wednesdays = wednesdays_df.iloc[:, 3]
    price_list_wednesdays = prices_wednesdays.astype(float).tolist()
    sample_mean = statistics.mean(price_list_wednesdays)
    return sample_mean
def main(filepath):
    df = load_stock_data(filepath)
    mean_price, variance_price = calculate_mean_variance(df)
    
    print(f"Mean Price: {mean_price:.2f}")
    print(f"Variance of Price: {variance_price:.2f}")
    population_mean = calculate_population_mean(df)
    print(f"Population Mean Price: {population_mean:.2f}")
    wednesdays_df = filter_wednesdays(df)
    sample_mean = calculate_sample_mean(wednesdays_df)
    print(f"Sample Mean Price for Wednesdays: {sample_mean:.2f}")
    print(f"Difference between Population Mean and Sample Mean for Wednesdays: {population_mean - sample_mean:.2f}")
main("C:/Users/year3/Downloads/Lab Session Data.xlsx")



Mean Price: 1560.66
Variance of Price: 58732.37
Population Mean Price: 1560.66
Sample Mean Price for Wednesdays: 1550.71
Difference between Population Mean and Sample Mean for Wednesdays: 9.96
