In [71]:
import plotly.express as px
import pandas as pd

In [72]:
df = pd.read_csv("../XP/bok-graph-result.csv")
print(len(df["s"].unique()), " unique courses")
print(len(df["label"].unique()), " unique labels")

total_courses = len(df["s"].unique())

## filter the dataframe with answer only equal to 1 or 0
df = df[df["answer"].isin(["0","1"])]
df["answer"] = df["answer"].astype(int)
df["score"] = df["score"].astype(float)

## filter the dataframe with score >= 0.7 and answer == 1
matched_courses = df[(df["score"] >= 0.7) & (df["answer"] == 1)]
matched_courses 

94  unique courses
92  unique labels


Unnamed: 0,s,label,ku_source,score,parcours,ka,ku,answer,explain
9,http://example.org/course/X1II030,Complexité et algorithmes,file:///Users/molli-p/BodyOfKnowledge/Algorith...,0.726731,"ATAL,ORO,DS,CMI,ALMA,VICO",Algorithmic_Foundations_AL,AL-Complexity_Complexity,1,The lecture covers most of the topics in the K...
10,http://example.org/course/X1II030,Complexité et algorithmes,file:///Users/molli-p/BodyOfKnowledge/Algorith...,0.707807,"ATAL,ORO,DS,CMI,ALMA,VICO",Algorithmic_Foundations_AL,AL-Complexity_Complexity,1,The lecture covers most of the topics in the K...
11,http://example.org/course/X1II030,Complexité et algorithmes,file:///Users/molli-p/BodyOfKnowledge/Algorith...,0.703667,"ATAL,ORO,DS,CMI,ALMA,VICO",Algorithmic_Foundations_AL,AL-Complexity_Complexity,1,The lecture covers most of the topics in the K...
38,http://example.org/course/X2II010,Machine Learning,file:///Users/molli-p/BodyOfKnowledge/Artifici...,0.756642,"ATAL,ORO,DS,CMI,ALMA,VICO",Artificial_Intelligence_AI,AI-ML_Machine_Learning,1,The lecture covers the main topics of supervis...
39,http://example.org/course/X2II010,Machine Learning,file:///Users/molli-p/BodyOfKnowledge/Artifici...,0.718221,"ATAL,ORO,DS,CMI,ALMA,VICO",Artificial_Intelligence_AI,AI-ML_Machine_Learning,1,The lecture covers the main topics of supervis...
...,...,...,...,...,...,...,...,...,...
274,http://example.org/course/ZZTASPM,Text and sequential pattern mining,file:///Users/molli-p/BodyOfKnowledge/Artifici...,0.728557,DS,Artificial_Intelligence_AI,AI-NLP_Natural_Language_Processing,1,The lecture covers the topics of sequential pa...
275,http://example.org/course/ZZTASPM,Text and sequential pattern mining,file:///Users/molli-p/BodyOfKnowledge/Data_Man...,0.724701,DS,Data_Management,Data_Management,1,"The lecture covers Sequential pattern mining, ..."
276,http://example.org/course/ZZTASPM,Text and sequential pattern mining,file:///Users/molli-p/BodyOfKnowledge/Artifici...,0.710875,DS,Artificial_Intelligence_AI,AI-ML_Machine_Learning,1,The lecture covers the main topics of machine ...
277,http://example.org/course/ZZVA,Visual analytics,file:///Users/molli-p/BodyOfKnowledge/Human_Co...,0.763781,DS,Human_Computer_Interaction_HCI,HCI-Evaluation_Evaluating_the_Design,1,The lecture covers core topics of the Knowledg...


In [81]:
# distinct count of s for each ka in the matched_courses dataframe
ka_count = matched_courses.groupby("ka")["s"].nunique()
ka_count = ka_count.reset_index()
ka_count.columns = ["ka","count"]

# sort the ka_count dataframe by ka label
ka_count = ka_count.sort_values(by="ka")
ka_count

Unnamed: 0,ka,count
0,Algorithmic_Foundations_AL,6
1,Architecture_and_Organization_AR,1
2,Artificial_Intelligence_AI,13
3,Data_Management,2
4,Foundations_Of_Programming_Languages,1
5,Graphics_and_Interactive_Techniques_GIT,1
6,Human_Computer_Interaction_HCI,2
7,Mathematical_and_Statistical_Foundations_MSF,6
8,Networking_and_Communication_NC,4
9,Society_Ethics_and_the_Profession_SEP,1


In [None]:
fig = px.line_polar(
    r=list(ka_count["count"]),
    theta=list(ka_count["ka"]),
    line_close=True,
)
fig.show()

In [86]:
# filter matched_courses dataframe with "ALMA" in parcours column
ATAL_courses = matched_courses[matched_courses["parcours"].str.contains("ATAL")]

# distinct count of s for each ka in the matched_courses dataframe
ka_count = ATAL_courses.groupby("ka")["s"].nunique()
ka_count = ka_count.reset_index()
ka_count.columns = ["ka", "count"]

# sort the ka_count dataframe by ka label
ka_count = ka_count.sort_values(by="ka")
ka_count

fig = px.line_polar(
    r=list(ka_count["count"]), theta=list(ka_count["ka"]), line_close=True, line_shape="spline"
)
fig.show()

In [119]:
fig = px.line_polar()
learning_path = ["ALMA", "DS", "ATAL", "RO"]
colors = px.colors.qualitative.Set1

for path in learning_path:
    courses = matched_courses[matched_courses["parcours"].str.contains(path)]
    ka_count = courses.groupby("ka")["s"].nunique()
    ka_count = ka_count.reset_index()
    ka_count.columns = ["ka", "count"]
    ka_count = ka_count.sort_values(by="ka")

    c = learning_path.index(path)
    css_color = colors[c]

    trace = px.line_polar(
        r=list(ka_count["count"]),
        theta=list(ka_count["ka"]),
        line_close=True,
        #line_shape="spline",
    ).data[0]
    trace.line.color = css_color
    trace.name = path
    fig.add_trace(trace)


fig.update_traces(showlegend=True)

fig.show()