In [10]:
# import required libraries
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats

In [11]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 100  # number of records

# gender
gender = np.random.choice(['Male', 'Female'], size=n)

# age group
age_group = np.random.choice(['Young', 'Middle-aged', 'Old'], size=n)

# beauty scores (scale 1–10)
beauty = np.random.normal(
    loc=[7 if a == 'Young' else 6 if a == 'Middle-aged' else 5 for a in age_group],
    scale=1,
    size=n
)
beauty = np.clip(beauty, 1, 10)  # keep values in range

# teaching evaluation (scale 1–10)
eval_score = (
    0.5 * beauty +
    np.where(gender == 'Female', 0.3, 0) +
    np.random.normal(0, 1, size=n)
)
eval_score = np.clip(eval_score, 1, 10)

data = pd.DataFrame({
    'gender': gender,
    'age_group': age_group,
    'beauty': beauty.round(2),
    'eval': eval_score.round(2)
})

data.to_csv("teachers_rating.csv", index=False)

data

Unnamed: 0,gender,age_group,beauty,eval
0,Male,Old,3.52,1.39
1,Female,Middle-aged,5.77,3.69
2,Male,Middle-aged,6.96,6.27
3,Male,Middle-aged,5.79,2.86
4,Male,Middle-aged,5.23,2.60
...,...,...,...,...
95,Female,Young,6.97,4.92
96,Female,Middle-aged,4.06,2.83
97,Female,Young,6.15,4.44
98,Female,Young,8.36,3.81


In [13]:
# Q1. Regression with T-test: Does gender affect teaching evaluation rates?

# assuming columns are named 'eval' (teaching evaluation) and 'gender' (male/female)
model_ttest = ols('eval ~ gender', data=data).fit()
ttest_result = model_ttest.t_test([0, 1])  # tests coefficient of gender variable

print("\nQ1. Regression with T-test: Does gender affect teaching evaluation rates?")
print(model_ttest.summary())


Q1. Regression with T-test: Does gender affect teaching evaluation rates?
                            OLS Regression Results                            
Dep. Variable:                   eval   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                  0.027
Method:                 Least Squares   F-statistic:                     3.729
Date:                Wed, 22 Oct 2025   Prob (F-statistic):             0.0564
Time:                        17:01:44   Log-Likelihood:                -150.34
No. Observations:                 100   AIC:                             304.7
Df Residuals:                      98   BIC:                             309.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------

In [14]:
# Q2. Regression with ANOVA: Does beauty score differ by age group?

model_anova = ols('beauty ~ C(age_group)', data=data).fit()
anova_table = sm.stats.anova_lm(model_anova, typ=2)

print("\nQ2. Regression with ANOVA: Does beauty score for instructors differ by age?")
print(anova_table)


Q2. Regression with ANOVA: Does beauty score for instructors differ by age?
                  sum_sq    df          F        PR(>F)
C(age_group)   63.442375   2.0  29.813282  8.080832e-11
Residual      103.207529  97.0        NaN           NaN


In [16]:
# Q3 . Correlation: Using the teachers' rating dataset, Is teaching evaluation score correlated with beauty score?

import statsmodels.api as sm
from statsmodels.formula.api import ols

# perform OLS regression: eval vs beauty
model = ols('eval ~ beauty', data=data).fit()

# display summary
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   eval   R-squared:                       0.273
Model:                            OLS   Adj. R-squared:                  0.265
Method:                 Least Squares   F-statistic:                     36.77
Date:                Wed, 22 Oct 2025   Prob (F-statistic):           2.49e-08
Time:                        17:02:57   Log-Likelihood:                -136.27
No. Observations:                 100   AIC:                             276.5
Df Residuals:                      98   BIC:                             281.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4801      0.453      1.060      0.2