In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from hmmlearn import hmm

In [2]:
data = {
    'Route': ['A', 'B', 'C', 'A', 'C', 'B'],
    'Passengers': [100, 150, 120, 80, 130, 110],
    'Distance': [5.2, 8.3, 3.5, 4.0, 3.8, 7.1],
    'Delay': [10, 5, 15, 8, 12, 6]
}

transport_df = pd.DataFrame(data)
transport_df.head()

Unnamed: 0,Route,Passengers,Distance,Delay
0,A,100,5.2,10
1,B,150,8.3,5
2,C,120,3.5,15
3,A,80,4.0,8
4,C,130,3.8,12


In [3]:
# # Optional
# transport_df.to_excel('transport_df.xlsx')
# transport_df = pd.read_excel('transport_df.xlsx',index_col=0) # be sure to use index_col=0
# transport_df.head()

Unnamed: 0,Route,Passengers,Distance,Delay
0,A,100,5.2,10
1,B,150,8.3,5
2,C,120,3.5,15
3,A,80,4.0,8
4,C,130,3.8,12


# Supervised Learning

## Linear Regression

In [4]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Passengers']

model = LinearRegression()
model.fit(X, y)
predicted_passengers = model.predict([[4.5, 9]])
print("Linear Regression Predicted Passengers:", predicted_passengers)

Linear Regression Predicted Passengers: [97.88631971]




## Logistic Regression

In [5]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Route']

model = LogisticRegression()
model.fit(X, y)
predicted_route = model.predict([[5.0, 10]])
print("Logistic Regression Predicted Route:", predicted_route)

Logistic Regression Predicted Route: ['A']




## Naive-Bayes

In [6]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Route']

model = GaussianNB()
model.fit(X, y)
predicted_route = model.predict([[6.0, 12]])
print("Naive Bayes Predicted Route:", predicted_route)


Naive Bayes Predicted Route: ['A']




## Random Forest

In [7]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Route']

model = RandomForestClassifier()
model.fit(X, y)
predicted_route = model.predict([[7.0, 15]])
print("Random Forest Predicted Route:", predicted_route)


Random Forest Predicted Route: ['B']




## Decision-Tree Classifier

In [8]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Route']

model = DecisionTreeClassifier()
model.fit(X, y)
predicted_route = model.predict([[3.5, 7]])
print("Decision Tree Predicted Route:", predicted_route)


Decision Tree Predicted Route: ['C']




## KNN Classifier

In [9]:
X = transport_df[['Distance', 'Delay']]
y = transport_df['Route']

model = KNeighborsClassifier()
model.fit(X, y)
predicted_route = model.predict([[4.5, 9]])
print("KNN Predicted Route:", predicted_route)

KNN Predicted Route: ['A']


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


# Unsupervised Learning

## K-means Clustering

In [10]:
X = transport_df[['Distance', 'Delay']]

model = KMeans(n_clusters=2)
model.fit(X)
clusters = model.labels_
print("K-Means Clusters:", clusters)

K-Means Clusters: [0 1 0 1 0 1]


# Principal Component Analysis

In [11]:
X = transport_df[['Passengers', 'Distance', 'Delay']]

model = PCA(n_components=2)
transformed_data = model.fit_transform(X)
print("PCA Transformed Data:\n", transformed_data)


PCA Transformed Data:
 [[-14.99989687   0.27910305]
 [ 35.13301976  -4.28281154]
 [  4.86563879   6.04333455]
 [-35.00828768  -1.57427312]
 [ 14.90166478   3.42838397]
 [ -4.89213878  -3.8937369 ]]


# Hidden Markov Models

In [12]:
X = transport_df[['Distance', 'Delay']]
X = X.values.reshape(-1, 2)

model = hmm.GaussianHMM(n_components=2)
model.fit(X)
predicted_states = model.predict(X)
print("HMM Predicted States:", predicted_states)


HMM Predicted States: [0 1 0 0 0 1]
