<a href="https://colab.research.google.com/github/praveen14402/AngulartodoListApp/blob/master/Online_Learning_Platform_User_Behavior_Analyzer3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np
import pandas as pd
from faker import Faker
import random
from itertools import combinations
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import plotly.graph_objects as go
import plotly.express as px
import warnings
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Initialize Faker for generating realistic course names
fake = Faker()

def generate_learning_data(num_courses=10, num_users=100, num_interactions=500):
    # Generate course names
    courses = [fake.word() for _ in range(num_courses)]

    interactions = []
    for _ in range(num_interactions):
        user_id = random.randint(1, num_users)
        course = random.choice(courses)
        time_spent = random.uniform(0.5, 5.0)  # Time spent in hours
        completed = random.choice([0, 1])  # 0: Not Completed, 1: Completed

        interactions.append({
            'user_id': user_id,
            'course': course,
            'time_spent': time_spent,
            'completed': completed
        })

    df = pd.DataFrame(interactions)

    # Pivot the data to get users' interaction data
    df_encoded = df.pivot_table(
        index='user_id',
        columns='course',
        values=['time_spent', 'completed'],
        aggfunc={'time_spent': 'sum', 'completed': 'sum'},
        fill_value=0
    )

    # Flatten the MultiIndex columns
    df_encoded.columns = ['_'.join(col) for col in df_encoded.columns]

    return df_encoded

def simple_apriori(df, min_support=0.1, min_confidence=0.5):
    def support(item_set):
        return (df[list(item_set)].sum(axis=1) >= 1).mean()

    items = set(df.columns)
    item_sets = [frozenset([item]) for item in items]
    rules = []

    for k in range(2, len(items) + 1):
        item_sets = [s for s in combinations(items, k) if support(s) >= min_support]

        for item_set in item_sets:
            item_set = frozenset(item_set)
            for i in range(1, len(item_set)):
                for antecedent in combinations(item_set, i):
                    antecedent = frozenset(antecedent)
                    consequent = item_set - antecedent
                    confidence = support(item_set) / support(antecedent)
                    if confidence >= min_confidence:
                        lift = confidence / support(consequent)
                        rules.append({
                            'antecedents': ', '.join(antecedent),
                            'consequents': ', '.join(consequent),
                            'support': support(item_set),
                            'confidence': confidence,
                            'lift': lift
                        })

        if len(rules) >= 10:  # Stop if we have at least 10 rules
            break

    return pd.DataFrame(rules).sort_values('lift', ascending=False)

def perform_kmeans_with_progress(df, n_clusters=3, update_interval=5):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42, max_iter=100)

    with tqdm(total=kmeans.max_iter, desc="K-means Clustering") as pbar:
        for i in range(kmeans.max_iter):
            kmeans.fit(df_scaled)
            pbar.update(1)
            if i % update_interval == 0:
                yield kmeans.labels_
            if kmeans.n_iter_ <= i + 1:
                break

    return kmeans.labels_

def visualize_apriori_rules(rules, top_n=10):
    top_rules = rules.head(top_n)

    fig = px.scatter_3d(
        top_rules, x="support", y="confidence", z="lift",
        color="lift", size="support",
        hover_name="antecedents", hover_data=["consequents"],
        labels={"support": "Support", "confidence": "Confidence", "lift": "Lift"},
        title=f"Top {top_n} Association Rules"
    )

    return fig

def visualize_kmeans_clusters(df, cluster_labels):
    pca = PCA(n_components=3)
    pca_result = pca.fit_transform(df)

    fig = px.scatter_3d(
        x=pca_result[:, 0], y=pca_result[:, 1], z=pca_result[:, 2],
        color=cluster_labels,
        labels={'color': 'Cluster'},
        title="User Clusters Visualization"
    )

    return fig

def main():
    print("Generating synthetic online learning platform data...")
    df_encoded = generate_learning_data(num_courses=10, num_users=100, num_interactions=500)
    print("Data generation complete.")
    print(f"Dataset shape: {df_encoded.shape}")

    print("Performing Apriori algorithm...")
    rules = simple_apriori(df_encoded, min_support=0.1, min_confidence=0.5)

    if not rules.empty:
        print(f"Apriori algorithm complete. Found {len(rules)} rules.")
        viz = visualize_apriori_rules(rules)
        viz.write_html("apriori_rules_3d.html")
        print("Apriori rules visualization saved as 'apriori_rules_3d.html'.")
    else:
        print("Apriori algorithm failed to generate rules.")

    print("Performing K-means clustering...")
    kmeans_generator = perform_kmeans_with_progress(df_encoded, n_clusters=3, update_interval=5)
    for i, labels in enumerate(kmeans_generator):
        print(f"K-means iteration {i*5}")
        viz = visualize_kmeans_clusters(df_encoded, labels)
        viz.write_html(f"user_clusters_3d_step_{i}.html")
        print(f"Intermediate visualization saved as 'user_clusters_3d_step_{i}.html'")

    final_labels = labels  # The last generated labels
    print("K-means clustering complete.")
    final_viz = visualize_kmeans_clusters(df_encoded, final_labels)
    final_viz.write_html("user_clusters_3d_final.html")
    print("Final user clusters visual saved as 'user_clusters_3d_final.html'.")

    print("Analysis complete.")

if __name__ == "__main__":
    main()


Generating synthetic online learning platform data...
Data generation complete.
Dataset shape: (99, 20)
Performing Apriori algorithm...
Apriori algorithm complete. Found 380 rules.
Apriori rules visualization saved as 'apriori_rules_3d.html'.
Performing K-means clustering...


K-means Clustering:   0%|          | 0/100 [00:00<?, ?it/s]

K-means iteration 0
Intermediate visualization saved as 'user_clusters_3d_step_0.html'










K-means iteration 5










K-means Clustering:   7%|▋         | 7/100 [00:00<00:04, 19.22it/s]


Intermediate visualization saved as 'user_clusters_3d_step_1.html'
K-means clustering complete.
Final user clusters visual saved as 'user_clusters_3d_final.html'.
Analysis complete.


[31mERROR: Could not find a version that satisfies the requirement itertools (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for itertools[0m[31m
[0m