Read the training data from a CSV file.

In [None]:
import pandas as pd
df = pd.read_csv('train.csv')

Create a copy of the DataFrame.

In [None]:
df = df.copy()

Check for NULL values in the DataFrame.

In [None]:
df.isnull().sum()

Generate a summary of statistics for numerical columns.

In [None]:
df.describe()

Visualize gender count vs response.

In [None]:
import seaborn as sns
gender_count = df['gender'].value_counts()
sns.barplot(x=gender_count.index, y=gender_count.values)

Plot age against previously insured.

In [None]:
import matplotlib.pyplot as plt
plt.scatter(df['age'], df['previously_insured'])
plt.xlabel('Age')
plt.ylabel('Previously Insured')
plt.show()

Visualize vehicle age against damage.

In [None]:
plt.bar(df['vehicle_age'], df['damage'])
plt.xlabel('Vehicle Age')
plt.ylabel('Damage')
plt.show()

Convert categorical variables to numeric.

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['categorical_column'] = le.fit_transform(df['categorical_column'])

Create a heatmap to visualize correlations.

In [None]:
import seaborn as sns
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Train a Random Forest Classifier.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

Evaluate the model using confusion matrix and report.

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
predictions = model.predict(X_test)
confusion = confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions)

Train an XGBoost classifier.

In [None]:
import xgboost as xgb
xg_model = xgb.XGBClassifier()
xg_model.fit(X_train, y_train)

Evaluate the XGBoost model.

In [None]:
xg_predictions = xg_model.predict(X_test)
confusion_xg = confusion_matrix(y_test, xg_predictions)
report_xg = classification_report(y_test, xg_predictions)

Compile final results of model evaluations.

In [None]:
final_results = {'random_forest': {'confusion_matrix': confusion, 'report': report}, 'xgboost': {'confusion_matrix': confusion_xg, 'report': report_xg}}