In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("Tourist.csv")
print("Data Loaded Successfully!")
print(df.head())

plt.figure(figsize=(8,5))
df["Age"].plot(kind='hist', bins=10, edgecolor='black')
plt.title("Age Distribution of Respondents")
plt.xlabel("Age")
plt.ylabel("Number of Respondents")
plt.show()

destination_counts = df["Tourist Destination Visited"].value_counts()
plt.figure(figsize=(8,5))
destination_counts.plot(kind='bar')
plt.title("Tourist Destinations Visited")
plt.xlabel("Destination")
plt.ylabel("Number of Visitors")
plt.xticks(rotation=45)
plt.show()

month_counts = df["Month of Travel"].value_counts()
plt.figure(figsize=(10,5))
month_counts.plot(kind='bar')
plt.title("Month-wise Travel Distribution")
plt.xlabel("Month")
plt.ylabel("Number of Visitors")
plt.xticks(rotation=45)
plt.show()

In [None]:
season_counts = df["Season of Travel"].value_counts()
plt.figure(figsize=(6,6))
plt.pie(season_counts, labels=season_counts.index, autopct='%1.1f%%', startangle=90)
plt.title("Preferred Season for Travel")
plt.show()

plt.figure(figsize=(8,5))
df["Duration of Stay (days)"].plot(kind='hist', bins=12, edgecolor="black")
plt.title("Duration of Stay Distribution")
plt.xlabel("Number of Days")
plt.ylabel("Number of Visitors")
plt.show()

plt.figure(figsize=(8,5))
df["Number of People Traveled With"].plot(kind='hist', bins=10, edgecolor="black")
plt.title("Group Size Distribution")
plt.xlabel("Number of People")
plt.ylabel("Number of Visitors")
plt.show()

festival_counts = df["Visited during Festival/Event?"].value_counts()
plt.figure(figsize=(5,5))
festival_counts.plot(kind='pie', autopct='%1.1f%%')
plt.title("Visited During Festival/Event?")
plt.ylabel("")
plt.show()

reason_counts = df["Main Reason for Visiting"].value_counts()
plt.figure(figsize=(7,4))
reason_counts.plot(kind='bar')
plt.title("Main Reason for Visiting")
plt.xlabel("Reason")
plt.ylabel("Number of Visitors")
plt.show()

In [None]:
import seaborn as sns

numeric_df = df[["Age", "Duration of Stay (days)", "Number of People Traveled With"]]
corr_matrix = numeric_df.corr()
print("Correlation Matrix:")
print(corr_matrix)

plt.figure(figsize=(6,4))
sns.heatmap(corr_matrix, annot=True, fmt=".2f")
plt.title("Correlation Matrix of Travel Data")
plt.show()

In [None]:
print("Missing Values Count per Column:")
print(df.isnull().sum())

print("\nPercentage of Missing Data:")
print((df.isnull().sum() / len(df)) * 100)

plt.figure(figsize=(10,5))
sns.heatmap(df.isnull(), cbar=False)
plt.title("Missing Data Heatmap")
plt.show()

df_drop = df.dropna()

df_fill_mean = df.fillna({
    "Age": df["Age"].mean(),
    "Duration of Stay (days)": df["Duration of Stay (days)"].mean(),
    "Number of People Traveled With": df["Number of People Traveled With"].mean()
})

for col in ["Gender", "Tourist Destination Visited", "Month of Travel",
            "Season of Travel", "Visited during Festival/Event?", "Main Reason for Visiting"]:
    if df[col].isnull().sum() > 0:
        df_fill_mean[col] = df_fill_mean[col].fillna(df[col].mode()[0])

print("\nAfter Filling Missing Data:")
print(df_fill_mean.isnull().sum())

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay

for col in ["Age", "Duration of Stay (days)", "Number of People Traveled With"]:
    df[col] = df[col].fillna(df[col].mean())

for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].fillna(df[col].mode()[0])

le_dict = {}
for col in df.select_dtypes(include="object").columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le

X = df.drop("Tourist Destination Visited", axis=1)
y = df["Tourist Destination Visited"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                             display_labels=le_dict["Tourist Destination Visited"].classes_)
disp.plot()
plt.title("Confusion Matrix for Tourist Destination Prediction")
plt.show()