Title: Popular Classification Algorithms


K Nearst Neighbors (KNN)

Task 1: Classify fruits based on weight and color.

In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Sample dataset (replace with your actual dataset)
data = pd.DataFrame({
    'weight': [150, 170, 200, 100, 120, 180, 220, 140, 160, 190],
    'color': [1, 1, 1, 2, 2, 2, 2, 3, 3, 3],  # 1: Red, 2: Green, 3: Yellow
    'fruit': ['Apple', 'Apple', 'Apple', 'Honeydew', 'Honeydew', 'Honeydew', 'Honeydew', 'Banana', 'Banana', 'Banana']
})

# Map fruit names to numerical values
fruit_map = {'Apple': 0, 'Honeydew': 1, 'Banana': 2}
data['fruit'] = data['fruit'].map(fruit_map)

# Split the dataset into features (X) and target (y)
X = data[['weight', 'color']]
y = data['fruit']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classify a new fruit based on weight and color
def classify_fruit(weight, color):
    prediction = clf.predict([[weight, color]])
    fruit_names = {v: k for k, v in fruit_map.items()}
    return fruit_names[prediction[0]]

# Test the function
weight = 160
color = 1
print("Predicted fruit:", classify_fruit(weight, color))


Task 2: Predict customer clothing size based on height and weight.

In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Sample dataset (replace with your actual dataset)
data = pd.DataFrame({
    'height': [160, 170, 180, 155, 165, 175, 185, 150, 165, 175],
    'weight': [50, 60, 70, 45, 55, 65, 75, 40, 55, 65],
    'size': ['S', 'M', 'L', 'S', 'M', 'M', 'L', 'S', 'M', 'M']
})

# Map size to numerical values
size_map = {'S': 0, 'M': 1, 'L': 2}
data['size'] = data['size'].map(size_map)

# Split the dataset into features (X) and target (y)
X = data[['height', 'weight']]
y = data['size']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Predict customer clothing size based on height and weight
def predict_size(height, weight):
    prediction = clf.predict([[height, weight]])
    size_names = {v: k for k, v in size_map.items()}
    return size_names[prediction[0]]

# Test the function
height = 170
weight = 60
print("Predicted size:", predict_size(height, weight))


Task 3: Determine optimal movie recommendation based on viewer preferences.

In [None]:

# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample dataset (replace with your actual dataset)
movies = pd.DataFrame({
    'title': ['The Shawshank Redemption', 'The Godfather', 'The Dark Knight', '12 Angry Men', 'Schindler\'s List'],
    'genre': ['Drama', 'Crime', 'Action', 'Drama', 'Biography'],
    'description': ['Hope and redemption', 'Mafia and crime', 'Superhero and villain', 'Justice and morality', 'War and survival']
})

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer to the movie descriptions and transform them into vectors
movie_vectors = vectorizer.fit_transform(movies['description'])

# Calculate the cosine similarity between movie vectors
similarity_matrix = linear_kernel(movie_vectors, movie_vectors)

# Function to get movie recommendations
def get_recommendations(title, num_recommendations=3):
    # Get the index of the movie
    movie_index = movies[movies['title'] == title].index[0]

    # Get the similarity scores for the movie
    similarity_scores = list(enumerate(similarity_matrix[movie_index]))

    # Sort the movies based on similarity scores
    similarity_scores.sort(key=lambda x: x[1], reverse=True)

    # Get the top N movie recommendations
    recommendations = similarity_scores[1:num_recommendations + 1]

    # Return the recommended movie titles
    return [movies.iloc[i[0]]['title'] for i in recommendations]

# Test the function
title = 'The Shawshank Redemption'
print("Recommended movies:")
print(get_recommendations(title))
