In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import random
import operator
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import json
import ipywidgets as widgets
import functools

%matplotlib inline
df = pd.read_csv("./data/notas_merged.csv")

del df["NUMORD"]
del df["SUBTIPO"]
del df["CENTRO"]
del df["PLAN"]
del df["NOTA_BASE_4"]
categorical_columns = ["CURSO","ANOCON","MUNICIPIO","PROVINCIA","DESPLAN","GENERO"]

COLORS = ["red","blue","yellow","pink","purple","green"]
DESCRIPTIONS = {
    "CURSO": ["There are ", " students from the year "],
    "ANOCON": ["There are ", " students who graduated in "],
    "MUNICIPIO": ["There are ", " students from the municipality of"],
    "PROVINCIA": ["There are ", " students from the province of "],
    "DESPLAN": ["There are ", "students of the plan "],
    "GENERO": ["The are ", " students of gender "]
}

    
category_counts = {}
for c in categorical_columns:
    df[c] = df[c].astype("category")
    category_counts[c] = dict(df[c].value_counts() )

df = df.sort_values("CURSO")

In [12]:
#At this point we have  df, category_counts
def get_most_frequents(category_counts):
    """ Generates most frequent dictionary """
    category_counts_most_frequent = {}
    for c in category_counts:
        max_cat = max(category_counts[c].items(), key=operator.itemgetter(1))[0]
        print(max_cat)
        category_counts_most_frequent[c] = {
                max_cat : category_counts[c][max_cat]
        }
    return category_counts_most_frequent
    

# + add title
def generate_grade_plot(X, title=None):
    fig_info = {}
    if title:
        fig_info["legend"]=title
    fig_info["arg"] = X["NOTA_BASE_10"]
    fig_info["color"] = random.choice(COLORS)
    return fig_info

def generate_grade_plots_2(X,c):
    """Generates grade plot for all categories in c"""
    plots = {}
    for cat in category_counts[c]:
        X_aux = X[X[c] == cat]
        plots[cat] = generate_grade_plot(X_aux)
    return plots

def generate_grade_plots(X):
    """Generates grade plot for all categorical cols in X"""
    plots = {}
    for c in categorical_columns:
        plots[c] = generate_grade_plots_2(X,c)
    return plots

def generate_category_count_pie(c,title=None):
    """Generates pie """
    p = {}
    p["title"] = title
    p["arg"] = [v for v in category_counts[c].values()]
    p["labels"] = [k for k in category_counts[c].keys()]
    p["colors"]=[random.choice(COLORS) for c in range(0,2)]
    return p

def generate_category_count_bar(c):
    fig = plt.figure()
    plt.bar(range(len(category_counts[c])), list(category_counts[c].values()), align='center',color=random.choice(COLORS))
    plt.xticks(range(len(category_counts[c])), list(category_counts[c].keys()))
    return fig

def generate_category_counts_bar():
    plots = {}
    for c in categorical_columns:
        plots[c] = generate_category_count_pie(c)
    return plots
 
def generate_description(category_counts):
    report = ""
    for c in categorical_columns:
        for cat in category_counts[c]:
            report += DESCRIPTIONS[c][0] + str(category_counts[c][cat]) + DESCRIPTIONS[c][1] + str(cat) +". "
    return report

In [13]:
# for student and teacher, generate the "course" input vs the total. In case of the student, inform him of which grade was his
def generate_student_report(profile, X):
    plots = {}
    c =  "CURSO"
    course = profile["curso"]
    
    X_course = X[X[c] == course]
    X_other = X[X[c] != course]
    
    plots_gradecmp_courses= [generate_grade_plot(X_course,"The median for all courses is ") for i in range(0,4)]
    plots_gradecmp_all = [generate_grade_plot(X_other,"") for i in range(0,4)]
    plots_dist_all = [generate_category_count_pie(c,"") for i in range(0,4)]
    
    dn1 = "Your grade is " + profile["nota"] + ". The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"])) + " The median grade for other courses is around" + str(np.median(X_other["NOTA_BASE_10"]))
    dn2 =  "Your grade is " + profile["nota"] + ". The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"]))
    dn3 = "Your grade is " + profile["nota"] 
    dn4 = "The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"])) + " The median grade for other courses is around" + str(np.median(X_other["NOTA_BASE_10"]))
    # optionally 
    return [random.choice(plots_gradecmp_courses), random.choice(plots_gradecmp_all), random.choice(plots_dist_all)]
    
def generate_teacher_report(profile, X): 
    plots = {}
    c =  "CURSO"
    course = profile["curso"]
    
    X_course = X[X[c] == course]
    X_other = X[X[c] != course]
    
    plots_gradecmp_courses= [generate_grade_plot(X_course,"your course's grade distribution") for i in range(0,4) ]
    plots_gradecmp_all = [generate_grade_plot(X_other,"all courses grade distlribution")  for i in range(0,4)]
    plots_dist_all = [generate_category_count_pie(c) for i in range(0,4)] 
    
    dn1 = "The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"])) + " The median grade for other courses is around" + str(np.median(X_other["NOTA_BASE_10"]))
    dn2 =  "The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"]))
    dn4 = "The median grade of your course is " + str(np.median(X_course["NOTA_BASE_10"])) + " The median grade for other courses is around" + str(np.median(X_other["NOTA_BASE_10"]))
    # optionally 
    return [random.choice(plots_gradecmp_courses), random.choice(plots_gradecmp_all), random.choice(plots_dist_all)]

def generate_delegate_report(profile, X):
    plots = {}
    
    X_h = X[X["GENERO"] == "H"]
    X_m = X[X["GENERO"] != "H"]
    
    dn1 = "The median grade of the female students is" + str(np.median(X_m["NOTA_BASE_10"])) + ".\n The median grade for male students is " + str(np.median(X_h["NOTA_BASE_10"])) + "."
    dn2 =  "The median grade of the female students is" + str(np.median(X_m["NOTA_BASE_10"]))
    dn3 = " The median grade for male students is " + str(np.median(X_h["NOTA_BASE_10"]))
    
    plots_gradecmp_female= [generate_grade_plot(X_m, "female") for i in range(0,4)]
    plots_gradecmp_male = [generate_grade_plot(X_h, "male") for i in range (0,4)] 
    plots_dist_all = [generate_category_count_pie("GENERO", "Gender distribution") for i in range(0,4 )]
    
    # optionally 
    return [plots_gradecmp_female, plots_gradecmp_male, plots_dist_all]


In [14]:
student_profile = {
    "age": 20,
    "class": "student",
    "gender": "female",
    "nota": "7.5",
    "description": "wants to know how its own grade (given) compares to the rest of the students in its course (given)",
    "curso": "2003-04"
}
teacher_profile = {
    "age": 48,
    "class": "teacher",
    "gender": "female",
    "description": "",
    "curso": "2003-04"
}
gender_delegate = {
    "age": 38,
    "class": "manager",
    "description": "wants to know how students of different gender have been performing over the years",
    "gender": "male"
}

In [15]:
def generate_grade_plot(X, title=None):
    fig_info = {}
    fig_info["title"]=title
    fig_info["arg"] = X["NOTA_BASE_10"]
    fig_info["color"] = random.choice(COLORS)
    fig_info["legend"] = title
    return fig_info

def generate_category_count_pie(c,title=None):
    """Generates pie """
    p = {}
    p["title"] = title
    p["arg"] = [v for v in category_counts[c].values()]
    p["labels"] = [k for k in category_counts[c].keys()]
    p["colors"] = [random.choice(COLORS) for c in range(0,2)]
    return p

def givemefigures(hist , pie, mode):
    #fig = plt.figure()
    #plt.title(hist.title + pie.title)
    if mode == "histpie":
        fig, (ax1, ax2) = plt.subplots(1, 2)
        plt.title(hist["title"]+pie["title"])
        ax1.hist(hist["arg"],color=hist["color"])
        red_patch = mpatches.Patch(color=hist["color"], label=hist["legend"])
        ax1.legend(handles=[red_patch])
        ax2.pie(pie["arg"],labels=pie["labels"],autopct=None,colors=pie["colors"])
    elif mode == "histhist":
        fig, (ax1, ax2) = plt.subplots(1, 2)
        plt.title(hist["title"]+pie["title"])
        ax1.hist(hist["arg"],color=hist["color"])
        red_patch = mpatches.Patch(color=hist["color"], label=hist["legend"])
        ax1.legend(handles=[red_patch])
        ax2.hist(pie["arg"],color=pie["color"])
        blue_patch = mpatches.Patch(facecolor="white",edgecolor="white",color=pie["color"], label=pie["legend"])
        ax2.legend(handles=[blue_patch])

    return fig

repo_delegate = generate_delegate_report(gender_delegate, df)
#givemefigures(repo_delegate[0][0],repo_delegate[2][0])

def create_reports(repo_someone):
    #pair each with one of the others
    chosen_ones = []
    for j in range(0,len(repo_someone[1])):
       chosen_ones.append((repo_someone[1][j], random.choice(repo_someone[2]),"histpie"))
    for i in range(0,len(repo_someone[0])):
        chosen_ones.append((repo_someone[0][i], random.choice(repo_someone[2]),"histpie"))
        chosen_ones.append((repo_someone[0][i], random.choice(repo_someone[1]),"histhist"))

        #chosen_ones.append((repo_someone[0][i], random.choice(repo_someone[1])))

        
    return chosen_ones
a = create_reports(repo_delegate)                             

In [20]:
def write_png(reports):
    figures = [] 

    for i in range(0,len(a)):
       figures.append(givemefigures(reports[i][0],reports[i][1],reports[i][2]))

    i=0;
    for fig in figures:
        fig.savefig(str(i)+".png")
        plt.close(fig)
        i+=1

repo_delegate = generate_delegate_report(gender_delegate, df)
#for f in report:
#    print(f.dpi)
# take axe 
write_png(create_reports(repo_delegate))



In [21]:
displayed = {
    "0":"2",
    "1": "1"
} 
#dictionnary with keys range 0 to 9 and values all numbers from 0 to nine removin i
tourney_scorer = dict([(i,list(range(0,i))) for i in range(1,9)])
next_index = 1

def pick_one(i):
    "Button(description='Image 0', style=ButtonStyle())"
    global results

    i = i.description[-1]
    results[str(displayed[str(i)])] = float(results[str(displayed[str(i)])]) + 1
    with open('result.json', 'w') as fp:
        json.dump(results, fp)
    looparound()
    
def load_2():
    global next_index
    if len(tourney_scorer[next_index]) == 0:
        next_index += 1
    try:
      if len(tourney_scorer[next_index]) >= 1:
        first = tourney_scorer[next_index][0]
        second = next_index
        tourney_scorer[next_index].remove(first)
        return [str(first),str(second)]      
    except:
        print("Done!")
        return ["-1","-1"]
    


In [22]:
from IPython.display import display
from ipywidgets import Image
from IPython.display import display, Javascript
from IPython import display as display_p
import os

results = {}
def looparound():
    global results
    display_p.clear_output(wait=True)
    exists = os.path.isfile('result.json')
    if exists:
        with open('result.json') as f:
            results = json.load(f)
        results = dict(results)

    else:
        results = dict((str(i),0) for i in range(0,12))

    [first, second] = load_2()
    if ["-1","-1"] == [first,second]:
        return;
    displayed = {
        "1": str(first),
        "2": str(second)
    }
    #actually display
    COLS = 2
    ROWS = 1
    IMAGES = [open(str(i)+".png", 'rb').read() for i in [first, second]]
    IMG_WIDTH = 300
    IMG_HEIGHT = 400

    rows = []

    for row in range(ROWS):
        cols = []
        for col in range(COLS):
            index = row * COLS + col
            image = widgets.Image(
                value=IMAGES[index], width=IMG_WIDTH, height=IMG_HEIGHT
            )
            button = widgets.Button(description='Image %d' % index)
            # Bind the click event to the on_click function, with our index as argument
            button.on_click(functools.partial(pick_one))

            # Create a vertical layout box, image above the button
            box = widgets.VBox([image, button])
            cols.append(box)

        # Create a horizontal layout box, grouping all the columns together
        rows.append(widgets.HBox(cols))

# Create a vertical layout box, grouping all the rows together
    result_disp = widgets.VBox(rows)
    display(result_disp)
looparound()

VBox(children=(HBox(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\xb0…

In [19]:
results

{'0': 0,
 '1': 0,
 '10': 0,
 '11': 0,
 '2': 0,
 '3': 0,
 '4': 0,
 '5': 0,
 '6': 0,
 '7': 0,
 '8': 0,
 '9': 0}