<a href="https://colab.research.google.com/github/solomontessema/Introduction-to-Python-with-Colab/blob/master/notebooks/Day_10_Introduction_to_Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

table>
  <tr>
    <td><img src="https://ionnova.com/img/ionnova_logo_name_2.png" width="120px"></td>
    <td><h1>Day 10: Introduction to Machine Learning</h1></td>
  </tr>
</table>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv('/content/house_prices.csv')
print(df[['sqft_living', 'price']])


In [None]:
# Scatter plot: sqft_living vs price
plt.figure(figsize=(8, 6))
plt.scatter(df['sqft_living'], df['price'], alpha=0.5, color='teal')
plt.xlabel('Living Area (sqft)')
plt.ylabel('House Price ($)')
plt.title('House Price vs Living Area')
plt.grid(True)
plt.show()


In [None]:
# Use only one feature: sqft_living
X = df[['sqft_living']]
y = df['price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("MRSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

In [None]:
# Ask user for input
sqft = float(input("Enter square footage of living space: "))

# Create a DataFrame with the input
user_df = pd.DataFrame([[sqft]], columns=['sqft_living'])

# Predict using the trained model
predicted_price = model.predict(user_df)[0]

# Show result
print(f"\nPredicted House Price: ${predicted_price:,.2f}")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv('/content/house_prices.csv')

# Select top 3 features
X = df[['sqft_living', 'grade', 'bathrooms']]
y = df['price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


In [None]:
# Ask user for all three inputs
sqft = float(input("Enter square footage of living space: "))
grade = float(input("Enter grade (1–13): "))
bath = float(input("Enter number of bathrooms: "))

# Create a DataFrame with the inputs
user_df = pd.DataFrame([[sqft, grade, bath]], columns=['sqft_living', 'grade', 'bathrooms'])

# Predict using the trained model
predicted_price = model.predict(user_df)[0]

# Show result
print(f"\nPredicted House Price: ${predicted_price:,.2f}")



In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load dataset
url = 'https://raw.githubusercontent.com/kennedykwangari/Mall-Customer-Segmentation-Data/master/Mall_Customers.csv'
df = pd.read_csv(url)

# Preprocess
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply KMeans
kmeans = KMeans(n_clusters=5, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

# Visualize
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster', data=df, palette='Set2')
plt.title("Customer Segments")
plt.show()


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("alyelbadry/house-pricing-dataset")

print("Path to dataset files:", path)