In [None]:
import pandas as pd
import statsmodels.api as sm

# Create a DataFrame with the given data
data = {
    'hours': [5, 8, 3, 7, 6, 4, 9, 2, 10, 6],
    'temperature': [25, 20, 30, 15, 18, 28, 22, 35, 19, 25],
    'score': [60, 75, 50, 70, 65, 55, 80, 45, 85, 68]
}

df = pd.DataFrame(data)

# Add a constant term to the independent variables matrix
X = sm.add_constant(df[['hours', 'temperature']])

# Fit the multiple linear regression model
model = sm.OLS(df['score'], X).fit()

# Display the regression results
print(model.summary())

# Predict exam scores for temperature values 40 and 45
fixed_hours = 2  # fixed value for the number of study hours
new_temperatures = [40, 45]

# Create a DataFrame with the new data for prediction
new_data = {'const': 1, 'hours': fixed_hours, 'temperature': new_temperatures}
new_X = pd.DataFrame(new_data)

# Make predictions
predictions = model.predict(new_X)

# Display the predicted exam scores
for temp, pred in zip(new_temperatures, predictions):
    print(f'Predicted score for temperature {temp}: {pred:.2f}')


                            OLS Regression Results                            
Dep. Variable:                  score   R-squared:                       0.995
Model:                            OLS   Adj. R-squared:                  0.993
Method:                 Least Squares   F-statistic:                     658.6
Date:                Fri, 15 Dec 2023   Prob (F-statistic):           1.07e-08
Time:                        20:25:28   Log-Likelihood:                -13.057
No. Observations:                  10   AIC:                             32.11
Df Residuals:                       7   BIC:                             33.02
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          34.1545      3.541      9.645      



In [None]:
pip install pandas statsmodels




In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler

# Create a DataFrame with the given data
data2 = {
    'Experience': [5, 3, None, 2, 4, 6, 8, 1, 9, 5],
    'Performance': ['Excellent', 'Good', 'Excellent', None, 'Good', 'Excellent', 'Excellent', 'Poor', 'Excellent', 'Good'],
    'Department': ['Sales', 'Marketing', 'Finance', 'HR', None, 'Engineering', 'Marketing', 'Finance', 'HR', 'Engineering'],
    'Salary': [60000, 45000, 80000, 35000, 55000, 70000, 85000, 30000, 90000, 60000],
    'Promotion': ['Eligible', 'Ineligible', 'Eligible', 'Ineligible', 'Ineligible', 'Eligible', 'Eligible', 'Ineligible', 'Eligible', 'Ineligible']
}

df2 = pd.DataFrame(data2)

# Step 1: Handling Missing Values
imputer = SimpleImputer(strategy='median')
df2['Experience'] = imputer.fit_transform(df2[['Experience']])

for col in ['Performance', 'Department']:
    df2[col].fillna(df2[col].mode()[0], inplace=True)

# Step 2: Encoding Categorical Variables
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_categorical = pd.DataFrame(encoder.fit_transform(df2[['Performance', 'Department']]), columns=encoder.get_feature_names_out(['Performance', 'Department']))
df2 = pd.concat([df2, encoded_categorical], axis=1)
df2.drop(['Performance', 'Department'], axis=1, inplace=True)

label_encoder = LabelEncoder()
df2['Promotion'] = label_encoder.fit_transform(df2['Promotion'])

# Step 3: Scaling Numerical Features
scaler = StandardScaler()
df2[['Experience', 'Salary']] = scaler.fit_transform(df2[['Experience', 'Salary']])

# Display the preprocessed DataFrame
print(df2)



   Experience    Salary  Promotion  Performance_Good  Performance_Poor  \
0    0.084819 -0.051367          0               0.0               0.0   
1   -0.763370 -0.821865          1               1.0               0.0   
2    0.084819  0.975964          0               0.0               0.0   
3   -1.187465 -1.335530          1               0.0               0.0   
4   -0.339276 -0.308199          1               1.0               0.0   
5    0.508913  0.462299          0               0.0               0.0   
6    1.357102  1.232797          0               0.0               0.0   
7   -1.611559 -1.592363          1               0.0               1.0   
8    1.781197  1.489630          0               0.0               0.0   
9    0.084819 -0.051367          1               1.0               0.0   

   Department_Finance  Department_HR  Department_Marketing  Department_Sales  
0                 0.0            0.0                   0.0               1.0  
1                 0.0      



In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Create a DataFrame with the given data
data3 = {
    'Study Hours per Day': [6, 10, 7, 12, 9, 10],
    'Assignments Grade': ['L', 'L', 'M', 'M', 'H', 'H'],
    'Lectures Attended': ['F', 'T', 'F', 'T', 'F', 'T'],
    'Quiz Grade': ['U', 'S', 'U', 'S', 'U', 'S'],
    'Status': ['Fail', 'Pass', 'Fail', 'Pass', 'Pass', 'Pass']
}

df3 = pd.DataFrame(data3)

# Encoding categorical variables
label_encoder = LabelEncoder()
df3['Assignments Grade'] = label_encoder.fit_transform(df3['Assignments Grade'])
df3['Lectures Attended'] = label_encoder.fit_transform(df3['Lectures Attended'])
df3['Quiz Grade'] = label_encoder.fit_transform(df3['Quiz Grade'])
df3['Status'] = label_encoder.fit_transform(df3['Status'])

# Features and target variable
X2 = df3.drop('Status', axis=1)
y2 = df3['Status']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
classifier = DecisionTreeClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Evaluate the classifier on the test set
accuracy = classifier.score(X_test, y_test)
print(f'Accuracy on the test set: {accuracy:.2%}')

# Classify a random student with features (L, T, S)
new_student = pd.DataFrame({'Study Hours per Day': [8], 'Assignments Grade': ['L'], 'Lectures Attended': ['T'], 'Quiz Grade': ['S']})
new_student['Assignments Grade'] = label_encoder.transform(new_student['Assignments Grade'])
new_student['Lectures Attended'] = label_encoder.transform(new_student['Lectures Attended'])
new_student['Quiz Grade'] = label_encoder.transform(new_student['Quiz Grade'])


prediction = classifier.predict(new_student)
print(f'Prediction for the new student: {label_encoder.inverse_transform(prediction)[0]}')


Accuracy on the test set: 100.00%


ValueError: ignored