In [14]:
from flask import Flask, request, jsonify, send_file
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import os

app = Flask(__name__)

# Load trained response analysis model (Train it if not done)
response_analyzer = joblib.load("C:/Users/Priti V/collegeproject/ML/models/student_response_analyzer.pkl")

# Ensure directory exists for reports
os.makedirs("static/reports", exist_ok=True)

@app.route("/analyze_response", methods=["POST"])
def analyze_response():
    data = request.json  # Expecting a JSON payload with 'student_id' and 'responses'
    student_id = data.get("student_id")
    responses = pd.DataFrame(data.get("responses"))
    
    # Check if correct answers
    responses['Correct'] = responses['Selected Answer'] == responses['Correct Answer']
    
    # Calculate skill performance
    skill_performance = responses.groupby('Skill Type')['Correct'].mean() * 100
    
    # Generate bar chart
    img_path = f"static/reports/student_{student_id}.png"
    skill_performance.plot(kind='bar', color=['blue', 'green', 'red'])
    plt.xlabel("Skills")
    plt.ylabel("Performance (%)")
    plt.title(f"Student {student_id} Performance Report")
    plt.ylim(0, 100)
    plt.savefig(img_path)  # Save image
    plt.close()
    
    return jsonify({"message": "Report generated!", "image_url": f"http://127.0.0.1:5002/{img_path}"})

# Serve static images
@app.route("/static/reports/<filename>")
def get_report_image(filename):
    return send_file(f"static/reports/{filename}", mimetype="image/png")

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5002, debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5002
 * Running on http://192.168.29.104:5002
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os

# Load student responses dataset
df = pd.read_csv("C:/Users/Priti V/collegeproject/ML/data/raw/student_responses.csv")

def analyze_student_performance(student_id):
    """
    Analyze student responses and generate a performance report.
    """
    # Filter data for the given student
    student_data = df[df["Student ID"] == student_id]

    if student_data.empty:
        print(f"No data found for Student ID: {student_id}")
        return

    # Check correctness of answers
    student_data["Correct"] = student_data["Selected Answer"] == student_data["Correct Answer"]

    # Calculate performance by skill type
    skill_performance = student_data.groupby("Skill Type")["Correct"].mean() * 100

    # Plot bar chart for visualization
    plt.figure(figsize=(8, 5))
    sns.barplot(x=skill_performance.index, y=skill_performance.values, palette="viridis")
    plt.xlabel("Skill Type")
    plt.ylabel("Accuracy (%)")
    plt.title(f"Student Performance Report (ID: {student_id})")
    plt.ylim(0, 100)
    plt.xticks(rotation=45)

    # Save the generated report
    os.makedirs("static/reports", exist_ok=True)
    report_path = f"static/reports/student_{student_id}_report.png"
    plt.savefig(report_path)
    plt.close()

    print(f"Report generated and saved at: {report_path}")
    return report_path

# Example usage:
# analyze_student_performance(student_id=101)


In [28]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

# Load survey data and recommendations dataset
math_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/math_survey.csv')
chemistry_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/chemistry_survey.csv')
physics_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/physics_survey.csv')
topics_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/recommendation_data/topic_recommendation.csv')

# Initialize encoders
encoder_dict = {}
topic_encoder = LabelEncoder()

# Function to encode categorical columns
def encode_df(df):
    for column in df.columns:
        if df[column].dtype == 'object':
            # Collect unique values from all datasets for this column
            unique_values = np.array([])
            for dataset in [math_df, chemistry_df, physics_df, topics_df]:
                if column in dataset.columns:
                    unique_values = np.unique(np.concatenate((unique_values, dataset[column].astype(str).unique())))
            # Create encoder if it doesn't exist
            if column not in encoder_dict:
                encoder_dict[column] = LabelEncoder()
                encoder_dict[column].fit(unique_values)
            # Encode the column (handle unknown values)
            df[column] = df[column].apply(
                lambda x: encoder_dict[column].transform([str(x)])[0] 
                if str(x) in encoder_dict[column].classes_ 
                else -1  # Assign -1 for unknown values
            )
    return df

# Encode all datasets (including topics_df)
math_df = encode_df(math_df)
chemistry_df = encode_df(chemistry_df)
physics_df = encode_df(physics_df)
topics_df = encode_df(topics_df)

# Encode the target variable ('Recommended_Topic') separately
topics_df['Recommended_Topic'] = topic_encoder.fit_transform(topics_df['Recommended_Topic'])

# Combine all datasets
combined_df = pd.concat([math_df, chemistry_df, physics_df, topics_df], axis=0)

# Handle missing values (use mode for categorical data)
# Handle missing values and cast to integer
combined_df.fillna(combined_df.mode().iloc[0], inplace=True)
combined_df['Recommended_Topic'] = combined_df['Recommended_Topic'].astype(int)  # Add this line



# Encode the target variable ('Recommended_Topic') as integer
topics_df['Recommended_Topic'] = topic_encoder.fit_transform(topics_df['Recommended_Topic'].astype(str))


# Verify that all columns are numeric
print("Data types after encoding:\n", combined_df.dtypes)
if not all(np.issubdtype(dtype, np.number) for dtype in combined_df.dtypes):
    print("\nError: Non-numeric columns found:", combined_df.select_dtypes(exclude=[np.number]).columns)
else:
    print("\nAll data encoded correctly!")

# Train KNN model
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
X = combined_df.drop(columns=['Recommended_Topic']).values  # Exclude the target column
knn.fit(X)

# Recommendation function
def recommend_topics(new_response):
    if len(new_response) != X.shape[1]:
        raise ValueError(f"New response must have {X.shape[1]} features.")
    
    encoded_response = []
    for val, col in zip(new_response, combined_df.columns[:-1]):  # Exclude 'Recommended_Topic'
        if col in encoder_dict:
            # Handle unknown values gracefully
            encoded_val = (
                encoder_dict[col].transform([str(val)])[0] 
                if str(val) in encoder_dict[col].classes_ 
                else -1
            )
            encoded_response.append(encoded_val)
        else:
            encoded_response.append(val)
    
    distances, indices = knn.kneighbors([encoded_response])
    recommended_encoded_topics = combined_df.iloc[indices[0]]['Recommended_Topic']
    recommended_topics = topic_encoder.inverse_transform(recommended_encoded_topics)
    
    print("Recommended Topics:")
    for i, topic in enumerate(recommended_topics, 1):
        print(f"{i}. {topic}")

# Example usage
new_response = ["Like", "Medium", "Yes", "No", "Like", "High", "Yes", "Yes"]
recommend_topics(new_response)


Data types after encoding:
 Question_Type_Preference         int64
Most_Challenging_Topic           int64
Problem_Solving_Speed          float64
Error_Analysis                   int64
Confidence_in_Math             float64
Confidence_in_Concepts         float64
Memorization_vs_Application    float64
Time_Management                float64
Recommended_Topic                int64
dtype: object

All data encoded correctly!
Recommended Topics:
1. 4
2. 4
3. 8
4. 5
5. 2


Updated response

In [31]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

# Load survey data and recommendations dataset
math_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/math_survey.csv')
chemistry_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/chemistry_survey.csv')
physics_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/survey_question/physics_survey.csv')
topics_df = pd.read_csv('C:/Users/Priti V/collegeproject/ML/data/recommendation_data/topic_recommendation.csv')

# Initialize encoders
encoder_dict = {}
topic_encoder = LabelEncoder()

# Function to encode categorical columns
def encode_df(df):
    for column in df.columns:
        if df[column].dtype == 'object':
            # Collect unique values from all datasets for this column
            unique_values = np.array([])
            for dataset in [math_df, chemistry_df, physics_df, topics_df]:
                if column in dataset.columns:
                    unique_values = np.unique(np.concatenate((unique_values, dataset[column].astype(str).unique())))
            # Create encoder if it doesn't exist
            if column not in encoder_dict:
                encoder_dict[column] = LabelEncoder()
                encoder_dict[column].fit(unique_values)
            # Encode the column (handle unknown values)
            df[column] = df[column].apply(
                lambda x: encoder_dict[column].transform([str(x)])[0] 
                if str(x) in encoder_dict[column].classes_ 
                else -1  # Assign -1 for unknown values
            )
    return df

# Encode all datasets (including topics_df)
math_df = encode_df(math_df)
chemistry_df = encode_df(chemistry_df)
physics_df = encode_df(physics_df)

# Encode the target variable ('Recommended_Topic') separately
topics_df['Recommended_Topic'] = topic_encoder.fit_transform(topics_df['Recommended_Topic'])

# Combine all datasets
combined_df = pd.concat([math_df, chemistry_df, physics_df, topics_df], axis=0)

# Ensure that all encoded columns are numeric
for column in combined_df.columns:
    combined_df[column] = pd.to_numeric(combined_df[column], errors='coerce').fillna(-1)

# Check if all columns are numeric
print("Data types after conversion:\n", combined_df.dtypes)
if not all(np.issubdtype(dtype, np.number) for dtype in combined_df.dtypes):
    print("\nError: Non-numeric columns found:", combined_df.select_dtypes(exclude=[np.number]).columns)
else:
    print("\nAll data encoded correctly!")

# Train KNN model
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
X = combined_df.drop(columns=['Recommended_Topic']).values  # Exclude the target column

try:
    knn.fit(X)
    print("KNN model trained successfully!")
except ValueError as e:
    print(f"Error during KNN fitting: {e}")

# Recommendation function
def recommend_topics(new_response):
    if len(new_response) != X.shape[1]:
        raise ValueError(f"New response must have {X.shape[1]} features.")
    
    encoded_response = []
    for val, col in zip(new_response, combined_df.columns[:-1]):  # Exclude 'Recommended_Topic'
        if col in encoder_dict:
            # Handle unknown values gracefully
            encoded_val = (
                encoder_dict[col].transform([str(val)])[0] 
                if str(val) in encoder_dict[col].classes_ 
                else -1
            )
            encoded_response.append(encoded_val)
        else:
            encoded_response.append(val)
    
    distances, indices = knn.kneighbors([encoded_response])
    recommended_encoded_topics = combined_df.iloc[indices[0]]['Recommended_Topic']
    recommended_topics = topic_encoder.inverse_transform(recommended_encoded_topics.astype(int))  # Convert to int before inverse_transform
    
    print("Recommended Topics:")
    for i, topic in enumerate(recommended_topics, 1):
        print(f"{i}. {topic}")

# Example usage
new_response = ["Like", "Medium", "Yes", "No", "Like", "High", "Yes", "Yes"]
recommend_topics(new_response)


Data types after conversion:
 Question_Type_Preference       float64
Most_Challenging_Topic         float64
Problem_Solving_Speed          float64
Error_Analysis                   int64
Confidence_in_Math             float64
Confidence_in_Concepts         float64
Memorization_vs_Application    float64
Time_Management                float64
Recommended_Topic              float64
dtype: object

All data encoded correctly!
KNN model trained successfully!
Recommended Topics:
1. Organic Chemistry
2. Geometry
3. Algebra
4. Classical Mechanics
5. Calculus
