**Flight Price Prediction Model**

In [None]:
# 1. Loading the Dataset
import pandas as pd

# Load the flights dataset
flights = pd.read_csv('flights.csv')

# 2. Data Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import numpy as np

# Handle categorical variables
encoder = OneHotEncoder()
categorical_features = encoder.fit_transform(flights[['from', 'to', 'flightType', 'agency']])

# Scale numerical features (removed 'age' as it's not in the flights dataset)
scaler = StandardScaler()
numerical_features = scaler.fit_transform(flights[['time', 'distance']])

# Combine features
X = np.concatenate([categorical_features.toarray(), numerical_features], axis=1)

# Target variable
y = flights['price'].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Feature Selection
from sklearn.feature_selection import SelectKBest, f_regression

# Select top k features
selector = SelectKBest(f_regression, k='all')
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

# 4. Model Training
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Train a regression model
regressor = LinearRegression()
regressor.fit(X_train_selected, y_train)

# Predict on test data
y_pred = regressor.predict(X_test_selected)

# 5. Model Validation
from sklearn.metrics import r2_score

# Calculate R^2 and RMSE
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f'R^2: {r2}')
print(f'RMSE: {rmse}')

# 6.Saving the Trained Model
import joblib

joblib.dump(regressor, 'flight_price_model.pkl')

#7.Saving the Encoder and Scaler
joblib.dump(encoder, 'encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')



**Gender Classification Model**

In [None]:
# 1. Loading the Dataset
import pandas as pd

# Load the users dataset
users = pd.read_csv('users.csv')

# 2. Data Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
import numpy as np

# Handling categorical variables
company_encoder = OneHotEncoder()
company_features = company_encoder.fit_transform(users[['company']])

# Scaling numerical features
scaler = StandardScaler()
age_feature = scaler.fit_transform(users[['age']])

# Combine features
X = np.concatenate([company_features.toarray(), age_feature], axis=1)

# Target variable (gender)
y = users['gender']

# Encode target variable
gender_encoder = LabelEncoder()
y_encoded = gender_encoder.fit_transform(y)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 3. Model Training
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train a classification model
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)

# Predict on test data
y_pred = classifier.predict(X_test)

# 4. Model Validation
# Calculate accuracy and other metrics
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(report)

# 5. Saving the Trained Model
import joblib

joblib.dump(classifier, 'gender_classification_model.pkl')

# 6. Saving the Encoder and Scaler
joblib.dump(company_encoder, 'company_encoder.pkl')
joblib.dump(scaler, 'age_scaler.pkl')
joblib.dump(gender_encoder, 'gender_encoder.pkl')


**Travel Recommendation Model**

In [None]:
# 1. Loading the Dataset
import pandas as pd

# Load the hotels dataset
hotels = pd.read_csv('hotels.csv')

# 2. Data Preprocessing
from sklearn.preprocessing import LabelEncoder

# Encode categorical features
hotel_encoder = LabelEncoder()
hotels['hotel_id'] = hotel_encoder.fit_transform(hotels['name'])

user_encoder = LabelEncoder()
hotels['user_id'] = user_encoder.fit_transform(hotels['userCode'])

# 3. Feature Engineering
data = hotels[['user_id', 'hotel_id', 'total']]

# 4. Model Training
# Using matrix factorization for collaborative filtering
from sklearn.decomposition import TruncatedSVD

# Create a matrix of users x hotels, filling missing values with zeros
matrix = data.pivot_table(index='user_id', columns='hotel_id', values='total').fillna(0)

# Determine the number of components
num_components = min(matrix.shape) - 1  # One less than the smaller dimension of the matrix

# Apply Truncated SVD for dimensionality reduction
svd = TruncatedSVD(n_components=num_components, random_state=42)
latent_matrix = svd.fit_transform(matrix)

# 5. Model Evaluation
reconstructed_matrix = svd.inverse_transform(latent_matrix)
reconstruction_error = ((matrix - reconstructed_matrix) ** 2).mean()

print(f'Reconstruction Error: {reconstruction_error}')

# 6. Saving the Trained Model
import joblib

joblib.dump(svd, 'hotel_recommendation_model.pkl')
joblib.dump(hotel_encoder, 'hotel_encoder.pkl')
joblib.dump(user_encoder, 'user_encoder.pkl')
