# Filter Method

In [7]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression


file_path = "MachineLearinningDataSet.csv"  
df = pd.read_csv(file_path, index_col=0)  

target_column = 'Cereal production (metric tons)'

# Select features and target
X = df.drop(columns=[target_column])
y = df[target_column]

# Filter Method
k = 10
correlation = X.corrwith(y)
correlation_selected = correlation.abs().nlargest(k).index.tolist()

f_selector = SelectKBest(score_func=f_regression, k=k)
f_selector.fit(X, y)
f_selected = X.columns[f_selector.get_support()].tolist()

mi_selector = SelectKBest(score_func=mutual_info_regression, k=k)
mi_selector.fit(X, y)
mi_selected = X.columns[mi_selector.get_support()].tolist()

selected_features = list(set(correlation_selected + f_selected + mi_selected))
print("Filter method selected features:", selected_features)

Filter method selected features: ['Surface area (sq. km)', 'Agricultural land (sq. km)', 'Land area (sq. km)', 'Forest area (sq. km)', 'Agricultural methane emissions (thousand metric tons of CO2 equivalent)', 'Rural population', 'Arable land (hectares)', 'Agriculture, forestry, and fishing, value added (current US$)', 'Agricultural nitrous oxide emissions (thousand metric tons of CO2 equivalent)', 'Arable land (% of land area)', 'Average precipitation in depth (mm per year)', 'Agricultural land (% of land area)', 'Land under cereal production (hectares)']


# Wrapper Method

In [5]:
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression


file_path = "MachineLearinningDataSet.csv" 
df = pd.read_csv(file_path, index_col=0)  


target_column = 'Cereal production (metric tons)'
X = df.drop(columns=[target_column])
y = df[target_column]

# Wrapper Method (RFE)
k = 10
model = LinearRegression()
rfe_selector = RFE(estimator=model, n_features_to_select=k, step=1)
rfe_selector = rfe_selector.fit(X, y)

selected_features = X.columns[rfe_selector.support_].tolist()
print("Wrapper method (RFE) selected features:", selected_features)

Wrapper method (RFE) selected features: ['Access to electricity, rural (% of rural population)', 'Agricultural raw materials exports (% of merchandise exports)', 'Agricultural raw materials imports (% of merchandise imports)', 'Agriculture, forestry, and fishing, value added (% of GDP)', 'Arable land (% of land area)', 'Arable land (hectares per person)', 'Employment in agriculture (% of total employment) (modeled ILO estimate)', 'Employment in agriculture, female (% of female employment) (modeled ILO estimate)', 'Employment in agriculture, male (% of male employment) (modeled ILO estimate)', 'Permanent cropland (% of land area)']


# Hybrid Method

In [6]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression, RFE
from sklearn.linear_model import LinearRegression

file_path = "MachineLearinningDataSet.csv"  
df = pd.read_csv(file_path, index_col=0)  

target_column = 'Cereal production (metric tons)'

# Select features and target
X = df.drop(columns=[target_column])
y = df[target_column]

# Hybrid Method
# Step 1: Filter Method
k = 10
correlation = X.corrwith(y)
correlation_selected = correlation.abs().nlargest(k*2).index.tolist()

f_selector = SelectKBest(score_func=f_regression, k=k*2)
f_selector.fit(X, y)
f_selected = X.columns[f_selector.get_support()].tolist()

mi_selector = SelectKBest(score_func=mutual_info_regression, k=k*2)
mi_selector.fit(X, y)
mi_selected = X.columns[mi_selector.get_support()].tolist()

filter_selected = list(set(correlation_selected + f_selected + mi_selected))

# Step 2: Wrapper Method on filtered features
X_filtered = X[filter_selected]

model = LinearRegression()
rfe_selector = RFE(estimator=model, n_features_to_select=k, step=1)
rfe_selector = rfe_selector.fit(X_filtered, y)

selected_features = X_filtered.columns[rfe_selector.support_].tolist()
print("Hybrid method selected features:", selected_features)

Hybrid method selected features: ['Permanent cropland (% of land area)', 'Employment in agriculture (% of total employment) (modeled ILO estimate)', 'Agricultural raw materials imports (% of merchandise imports)', 'Access to electricity, rural (% of rural population)', 'Agricultural raw materials exports (% of merchandise exports)', 'Arable land (% of land area)', 'Employment in agriculture, male (% of male employment) (modeled ILO estimate)', 'Rural population growth (annual %)', 'Arable land (hectares per person)', 'Rural population (% of total population)']
