Load training, test, and country information data from CSV files into Pandas DataFrames.

In [None]:
import pandas as pd

train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')
country_info = pd.read_csv('country_info.csv')

Combine the training, test, and country DataFrames into a single DataFrame.

In [None]:
data_frames = pd.concat([train_data, test_data, country_info], axis=1)

Perform feature selection to identify the top 10 features using SelectKBest.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

X = data_frames.drop('target', axis=1)
y = data_frames['target']
selector = SelectKBest(score_func=f_classif, k=10)
selector.fit(X, y)
selected_features = selector.get_support(indices=True)

Split the selected features and target variable into training and test sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data_frames.iloc[:, selected_features], data_frames['target'], test_size=0.2, random_state=42)

Initialize and train a LightGBM model using the training dataset.

In [None]:
import lightgbm as lgb

model = lgb.LGBMClassifier()
model.fit(X_train, y_train)

Make predictions on the test dataset using the trained model.

In [None]:
predictions = model.predict(X_test)

Visualize the predictions by plotting the actual vs predicted values.

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,5))
plt.scatter(y_test, predictions)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Predictions')
plt.show()

Evaluate the model's performance using a classification report.

In [None]:
from sklearn.metrics import classification_report

report = classification_report(y_test, predictions)
print(report)

Display a histogram of feature importances from the trained model.

In [None]:
plt.figure(figsize=(10,5))
plt.hist(model.feature_importances_, bins=10)
plt.title('Feature Importance')
plt.xlabel('Feature Index')
plt.ylabel('Importance')
plt.show()

Calculate learning curve data for the model.

In [None]:
import numpy as np

train_sizes = np.linspace(0.1, 1.0, 10)
train_scores, test_scores = model.learning_curve(X_train, y_train, train_sizes=train_sizes)

Plot the learning curve showing training and test scores as training size increases.

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_sizes, train_scores.mean(axis=1), label='Training Score')
plt.plot(train_sizes, test_scores.mean(axis=1), label='Test Score')
plt.title('Learning Curve')
plt.xlabel('Training Size')
plt.ylabel('Score')
plt.legend()
plt.show()