In [9]:
# Assignment 2 (Test of Significance, Regression, and Optimization)
# I started by importing required libraries
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
from scipy.optimize import linprog

In [10]:
# SECTION 1 is all about Hypothesis Testing
# Hypothesis Test for Reduction in Assembly Time

# The given values for assembly time
original_mean = 35           #This is the average time before training
std_dev = 5                  # Standard deviation of assembly times
sample_mean_time = 33        # Sample mean time post-training
sample_size = 40             # A sample size
alpha = 0.05                 # The significance level for test

# Computing the z-score
z_score = (sample_mean_time - original_mean) / (std_dev / np.sqrt(sample_size))

# Finding the p-value (one-tailed test)
p_val = stats.norm.cdf(z_score)

# Making a decision based on the p-value and significance level
if p_val < alpha:
    conclusion = "Reject null hypothesis: significant reduction in assembly time."
else:
    conclusion = "Fail to reject null hypothesis: no significant reduction in assembly time."

print("Hypothesis Test for Assembly Time")
print(f"Z-score = {z_score}")
print(f"P-value = {p_val}")
print(f"Conclusion = {conclusion}")
print("\n")


Hypothesis Test for Assembly Time
Z-score = -2.5298221281347035
P-value = 0.005706018193000823
Conclusion = Reject null hypothesis: significant reduction in assembly time.




The goal was to test if the assembly time significantly decreased after a training program.
	•	Z-score: -2.5290
	•	P-value: 0.00576

Since the p-value is less than the significance level (0.05), we reject the null hypothesis. This result indicates a significant reduction in assembly time following the training.


In [14]:
#Hypothesis Test for Graduate Student Study Hours 
#Given values for student study hours
avg_study_hours = 25         
sample_size_students = 15   
sample_mean_hours = 27       
sample_std_dev = 4.5         

#Calculating the t-score
t_stat = (sample_mean_hours - avg_study_hours) / (sample_std_dev / np.sqrt(sample_size_students))
df = sample_size_students - 1  

# Calculating the one-tailed p-value using the t-distribution
p_value_study_hours = 1 - stats.t.cdf(t_stat, df=df)

# Conclusion for the hypothesis test is decided
if p_value_study_hours < alpha:
    result = "Reject null hypothesis: graduate students study significantly more than 25 hours per week."
else:
    result = "Fail to reject null hypothesis: insufficient evidence that study hours exceed 25 hours."

print("Graduate Student Study Hours Hypothesis Test")
print(f"T-statistic = {t_stat}")
print(f"P-value = {p_value_study_hours}")
print(f"Conclusion = {result}")
print("\n")

Graduate Student Study Hours Hypothesis Test
T-statistic = 1.7213259316477407
P-value = 0.05360191367469436
Conclusion = Fail to reject null hypothesis: insufficient evidence that study hours exceed 25 hours.




The objective was to determine if graduate students study, on average, more than 25 hours per week.

	•	T-score: 1.7213
	•	P-value: 0.0536

Since the p-value is greater than the significance level (0.05), we fail to reject the null hypothesis. This result suggests insufficient evidence to conclude that graduate students study more than 25 hours per week.


In [19]:
#SECTION 2 Regression Analysis
# Data for hours of study and exam scores stored in python lists
hours_study = [5, 8, 5, 7, 7, 8, 3, 8, 0, 8, 5, 0, 2, 8, 7, 7, 4, 4, 2, 6, 1, 7, 6, 2, 8, 0, 5, 8, 7, 4]
scores = [52.1221, 82.1221, 52.1221, 72.1221, 72.1221, 82.1221, 32.1221, 82.1221, 2.122104, 82.1221, 
          52.1221, 2.122104, 22.1221, 82.1221, 72.1221, 72.1221, 42.1221, 42.1221, 22.1221, 62.1221, 
          12.1221, 72.1221, 62.1221, 22.1221, 82.1221, 2.122104, 52.1221, 82.1221, 72.1221, 42.1221]

#Creating DataFrame for regression data
df = pd.DataFrame({'Study Hours': hours_study, 'Scores': scores})

#Adding a constant to predictor variables 
X = sm.add_constant(df['Study Hours']) 
Y = df['Scores']

# Fitting my model and displaying a summary
regression_model = sm.OLS(Y, X).fit()

print("Regression Analysis Results")
print(regression_model.summary())
print("\n")


Regression Analysis Results
                            OLS Regression Results                            
Dep. Variable:                 Scores   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 2.311e+16
Date:                Wed, 30 Oct 2024   Prob (F-statistic):          2.20e-210
Time:                        17:52:30   Log-Likelihood:                 374.09
No. Observations:                  30   AIC:                            -744.2
Df Residuals:                      28   BIC:                            -741.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           2.1221

The objective was to examine the relationship between hours of study and exam scores using a simple linear regression model.

Regression Model Summary:

	•	Dependent Variable: Exam Score
	•	Independent Variable: Study Hours
	•	R-squared: 1.000
	•	This value indicates that the model explains 100% of the variance in exam scores, suggesting a perfect linear relationship between study hours and scores.

Coefficients:

	•	Intercept (Constant): 2.1221
	•	Study Hours Coefficient: 10.0000
	•	For every additional hour of study, the exam score increases by 10 points.

Statistical Significance:

	•	p-values for both the intercept and study hours coefficient are 0.000, indicating that both are statistically significant at any reasonable significance level.

Model Diagnostics:

	•	F-statistic: 2.31e+16 with a p-value of 2.20e-210, confirming that the overall model fit is statistically significant.
	•	Durbin-Watson: 2.189, suggesting no significant autocorrelation in residuals.


In [8]:
# SECTION 3 is linear 0ptimization
# We use negative coefficients for maximization in linprog
objective = [-1, -1, -1]  

# Machine hour constraints for each product X and Y machine hours per unit
constraints = [
    [4, 3, 2],   #machine Y hours required for every product
    [2, 1, 3],  #machine X hours required for each product
   
]
available_hours = [100, 85]  #This are the available hours for Machines X and Y

# These are bounds for each variable 
bounds = [(0, None), (0, None), (0, None)]

#Solving the linear programming problem
lp_result = linprog(c=objective, A_ub=constraints, b_ub=available_hours, bounds=bounds, method='highs')

#Displaying results if the optimization is successful
print("Linear Optimization Results Are")
if lp_result.success:
    print(f"Optimal Production Units of Product A = {lp_result.x[0]:.2f}")
    print(f"Optimal Production Units of Product C = {lp_result.x[2]:.2f}")
    print(f"Maximum Production Output = {-lp_result.fun:.2f}")
    print(f"Optimal Production Units of Product B = {lp_result.x[1]:.2f}")    
else:
    print("The optimization failed to find a solution.")

Linear Optimization Results Are
Optimal Production Units of Product A = 0.00
Optimal Production Units of Product C = 22.14
Maximum Production Output = 40.71
Optimal Production Units of Product B = 18.57


The objective was to maximize production output for three products (A, B, and C) given the constraints on machine hours for Machine X and Machine Y.

Problem Setup:

	Objective Coefficients: [-1, -1, -1] (used as negatives to maximize in linprog)
	Constraints:
	•	Product A requires 2 hours on Machine X and 4 hours on Machine Y.
	•	Product B requires 1 hour on Machine X and 3 hours on Machine Y.
	•	Product C requires 3 hours on Machine X and 2 hours on Machine Y.
	Available Hours:
	•	Machine X: 100 hours
	•	Machine Y: 85 hours

    Results:

	Optimal Production Units:
	•	Product A: 0.00 units
	•	Product B: 18.57 units
	•	Product C: 22.14 units
	Maximum Production Output: 40.71 units

This result suggests that to maximize output given the machine constraints, the company should focus on producing Products B and C, with no production of Product A.
