<a href="https://colab.research.google.com/github/shauryapanhale/skill2/blob/main/skill_exp_7%2C8%2C9%2C10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif, chi2, mutual_info_regression
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
dataset = pd.read_csv('/content/hifi.csv')

# Drop non-numeric and ID columns for simplicity
filtered_data = dataset.drop(['Student_ID', 'Field_of_Study', 'Current_Job_Level', 'Gender', 'Entrepreneurship'], axis=1)

# Encode categorical columns if necessary (e.g., 'Gender')
if 'Gender' in dataset.columns:
    le = LabelEncoder()
    dataset['Gender'] = le.fit_transform(dataset['Gender'])

# Separate features and target
X = filtered_data.drop('Starting_Salary', axis=1)
y = filtered_data['Starting_Salary']

# Standardize features for statistical tests
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Filter Methods
# 1. ANOVA F-test
anova_selector = SelectKBest(score_func=f_classif, k=5)
anova_selector.fit(X_scaled, y)
selected_features_anova = X.columns[anova_selector.get_support()].tolist()

Original Features: ['Age', 'High_School_GPA', 'SAT_Score', 'University_Ranking', 'University_GPA', 'Internships_Completed', 'Projects_Completed', 'Certifications', 'Soft_Skills_Score', 'Networking_Score', 'Job_Offers', 'Starting_Salary', 'Career_Satisfaction', 'Years_to_Promotion', 'Work_Life_Balance', 'Student_ID_S00002', 'Student_ID_S00003', 'Student_ID_S00004', 'Student_ID_S00005', 'Student_ID_S00006', 'Student_ID_S00007', 'Student_ID_S00008', 'Student_ID_S00009', 'Student_ID_S00010', 'Student_ID_S00011', 'Student_ID_S00012', 'Student_ID_S00013', 'Student_ID_S00014', 'Student_ID_S00015', 'Student_ID_S00016', 'Student_ID_S00017', 'Student_ID_S00018', 'Student_ID_S00019', 'Student_ID_S00020', 'Student_ID_S00021', 'Student_ID_S00022', 'Student_ID_S00023', 'Student_ID_S00024', 'Student_ID_S00025', 'Student_ID_S00026', 'Student_ID_S00027', 'Student_ID_S00028', 'Student_ID_S00029', 'Student_ID_S00030', 'Student_ID_S00031', 'Student_ID_S00032', 'Student_ID_S00033', 'Student_ID_S00034', 'St

In [None]:
# 2. Chi-Square Test (for categorical targets, discretizing continuous features)
chi2_selector = SelectKBest(score_func=chi2, k=5)
chi2_selector.fit(X_scaled, y)
selected_features_chi2 = X.columns[chi2_selector.get_support()].tolist()

Chi-Square Filtered Features: ['Age', 'High_School_GPA', 'SAT_Score', 'University_Ranking', 'University_GPA']


In [None]:
# 3. Information Gain (Mutual Information)
info_gain_selector = SelectKBest(score_func=mutual_info_regression, k=5)
info_gain_selector.fit(X_scaled, y)
selected_features_info_gain = X.columns[info_gain_selector.get_support()].tolist()


Mutual Information Filtered Features: ['Age', 'High_School_GPA', 'SAT_Score', 'University_Ranking', 'University_GPA']


In [None]:
# 4. Pearson's Correlation
correlation_matrix = filtered_data.corr()
pearson_correlation = correlation_matrix['Starting_Salary'].sort_values(ascending=False)

In [None]:
# Wrapper Methods
# 1. Forward Selection (Stepwise Regression)
forward_selector = SFS(LinearRegression(),
                        k_features=5,
                        forward=True,
                        floating=False,
                        scoring='r2',
                        cv=5)
forward_selector = forward_selector.fit(X, y)
selected_features_forward = list(forward_selector.k_feature_names_)

In [None]:
# 2. Backward Selection
backward_selector = SFS(LinearRegression(),
                         k_features=5,
                         forward=False,
                         floating=False,
                         scoring='r2',
                         cv=5)
backward_selector = backward_selector.fit(X, y)
selected_features_backward = list(backward_selector.k_feature_names_)


In [None]:
# Results
{
    "Filter Method (ANOVA)": selected_features_anova,
    "Filter Method (Chi-Square)": selected_features_chi2,
    "Filter Method (Information Gain)": selected_features_info_gain,
    "Filter Method (Pearson Correlation)": pearson_correlation,
    "Wrapper Method (Forward Selection)": selected_features_forward,
    "Wrapper Method (Backward Selection)": selected_features_backward,
    "Wrapper Method (RFT)": important_features_rft,
}
