In [None]:

import os
import pandas as pd
import numpy as np

# ---------------------------------------------------------------------
# Question #1
# ---------------------------------------------------------------------


def get_assignment_names(grades):
    '''
    get_assignment_names takes in a dataframe like grades and returns
    a dictionary with the following structure:

    The keys are the general areas of the syllabus: lab, project,
    midterm, final, disc, checkpoint

    The values are lists that contain the assignment names of that type.
    For example the lab assignments all have names of the form labXX where XX
    is a zero-padded two digit number. See the doctests for more details.

    :Example:
    >>> grades_fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(grades_fp)
    >>> names = get_assignment_names(grades)
    >>> set(names.keys()) == {'lab', 'project', 'midterm', 'final', 'disc', 'checkpoint'}
    True
    >>> names['final'] == ['Final']
    True
    >>> 'project02' in names['project']
    True
    '''
    header = grades.columns
    Dict = dict()
    result1 =  np.where(header.str.contains('lab') == True)
    result2 = np.where(header.str.contains('-')==False)
    result_lab = np.intersect1d(result1,result2)
    result1 =  np.where(header.str.contains('project') == True)
    result3 = np.where(header.str.contains('_') == False)
    result_p = np.intersect1d(result1,result2)
    result_p = np.intersect1d(result_p,result3)
    result1 =  np.where(header.str.contains('checkpoint') == True)
    result_checkpoint = np.intersect1d(result1,result2)
    result1 =  np.where(header.str.contains('disc') == True)
    result_disc = np.intersect1d(result1,result2)
    Dict['lab'] = list(header[result_lab])
    Dict['project'] = list(header[result_p])
    Dict['midterm'] = ['Midterm']
    Dict['final'] = ['Final']
    Dict['checkpoint']=list(header[result_checkpoint])
    Dict['disc']=list(header[result_disc])
    return Dict


# ---------------------------------------------------------------------
# Question #2
# ---------------------------------------------------------------------


def projects_total(grades):
    '''
    projects_total that takes in grades and computes the total project grade
    for the quarter according to the syllabus.
    The output Series should contain values between 0 and 1.
    
    :Example:
    >>> grades_fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(grades_fp)
    >>> out = projects_total(grades)
    >>> np.all((0 <= out) & (out <= 1))
    True
    >>> 0.7 < out.mean() < 0.9
    True
    '''
    header = grades.columns
    dic = get_assignment_names(grades)
    lis = dic['project']
    total = pd.Series(0,index = range(len(grades[lis[0]])))
    for i in lis:
        st = i+'_free_response'
        s = i+' - Max Points'
        fs = st+' - Max Points'
        if (st in header):
            g = grades[st].add(grades[i],fill_value=0)
            t = grades[s].add(grades[fs])
        else:
            g = grades[i]
            t = grades[s]
        proportion = g.divide(t,fill_value=0)
        total = proportion/len(lis)+total
    return total


# ---------------------------------------------------------------------
# Question # 3
# ---------------------------------------------------------------------


def last_minute_submissions(grades):
    """
    last_minute_submissions takes in the dataframe
    grades and a Series indexed by lab assignment that
    contains the number of submissions that were turned
    in on time by the student, yet marked 'late' by Gradescope.

    :Example:
    >>> fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(fp)
    >>> out = last_minute_submissions(grades)
    >>> isinstance(out, pd.Series)
    True
    >>> np.all(out.index == ['lab0%d' % d for d in range(1,10)])
    True
    >>> (out > 0).sum()
    8
    """
    header = grades.columns
    result1 =  np.where(header.str.contains('lab') == True)
    result2 = np.where(header.str.contains('Lateness')==True)
    result_p = np.intersect1d(result1,result2)
    df = grades[header[result_p]]
    l = []
    for i in header[result_p]:
        a = df[i].apply(cal)
        b = (a==True).sum()
        l.append(b)
    ser = pd.Series(l, index =get_assignment_names(grades)['lab'])
    return ser

def cal(st):
    """
    helper method
    """
    lst = st.split(':')
    i = int(lst[0])*3600+int(lst[1])*60+int(lst[2])
    if (i <= 36000 and i >0):
        return True
    else:
        return False

# ---------------------------------------------------------------------
# Question #4
# ---------------------------------------------------------------------

def lateness_penalty(col):
    """
    lateness_penalty takes in a 'lateness' column and returns
    a column of penalties according to the syllabus.

    :Example:
    >>> fp = os.path.join('data', 'grades.csv')
    >>> col = pd.read_csv(fp)['lab01 - Lateness (H:M:S)']
    >>> out = lateness_penalty(col)
    >>> isinstance(out, pd.Series)
    True
    >>> set(out.unique()) <= {1.0, 0.9, 0.8, 0.5}
    True
    """
        
    return col.apply(calculate)

def calculate(st):
    lst = st.split(':')
    i = int(lst[0])*3600+int(lst[1])*60+int(lst[2])
    if (cal(st)):
        if (i <= 604800 and i >36000):
            return 0.9
        elif(i>604800 and i <=1209600):
            return 0.8
        elif(i >1209600):
            return 0.5
    else:
        return 1.0

# ---------------------------------------------------------------------
# Question #5
# ---------------------------------------------------------------------

def process_labs(grades):
    """
    process_labs that takes in a dataframe like grades and returns
    a dataframe of processed lab scores. The output should:
      * share the same index as grades,
      * have columns given by the lab assignment names (e.g. lab01,...lab10)
      * have values representing the lab grades for each assignment,
        adjusted for Lateness and scaled to a score between 0 and 1.

    :Example:
    >>> fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(fp)
    >>> out = process_labs(grades)
    >>> out.columns.tolist() == ['lab%02d' % x for x in range(1,10)]
    True
    >>> np.all((0.65 <= out.mean()) & (out.mean() <= 0.90))
    True
    """
    labs = get_assignment_names(grades).get('lab')

    for col in labs:
        col_num = np.where(grades.columns.str.contains(col))[0]
        col_name = grades.columns[col_num]
       
        grades[col_name[2]] = lateness_penalty(grades[col_name[2]])
        grades[col_name[0]] = grades[col_name[0]] * grades[col_name[2]] / grades[col_name[1]]
        
    return grades[labs].fillna(0)


# ---------------------------------------------------------------------
# Question #6
# ---------------------------------------------------------------------

def lab_total(processed):
    """
    lab_total takes in dataframe of processed assignments (like the output of
    Question 5) and computes the total lab grade for each student according to
    the syllabus (returning a Series).
    
    Your answers should be proportions between 0 and 1.

    :Example:
    >>> cols = 'lab01 lab02 lab03'.split()
    >>> processed = pd.DataFrame([[0.2, 0.90, 1.0]], index=[0], columns=cols)
    >>> np.isclose(lab_total(processed), 0.95).all()
    True
    """

    return ...


# ---------------------------------------------------------------------
# Question # 7
# ---------------------------------------------------------------------

def total_points(grades):
    """
    total_points takes in grades and returns the final
    course grades according to the syllabus. Course grades
    should be proportions between zero and one.

    :Example:
    >>> fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(fp)
    >>> out = total_points(grades)
    >>> np.all((0 <= out) & (out <= 1))
    True
    >>> 0.7 < out.mean() < 0.9
    True
    """
        
    return ...


def final_grades(total):
    """
    final_grades takes in the final course grades
    as above and returns a Series of letter grades
    given by the standard cutoffs.

    :Example:
    >>> out = final_grades(pd.Series([0.92, 0.81, 0.41]))
    >>> np.all(out == ['A', 'B', 'F'])
    True
    """

    return ...


def letter_proportions(grades):
    """
    letter_proportions takes in the dataframe grades
    and outputs a Series that contains the proportion
    of the class that received each grade.

    :Example:
    >>> fp = os.path.join('data', 'grades.csv')
    >>> grades = pd.read_csv(fp)
    >>> out = letter_proportions(grades)
    >>> np.all(out.index == ['B', 'C', 'A', 'D', 'F'])
    True
    >>> out.sum() == 1.0
    True
    """

    return ...

# ---------------------------------------------------------------------
# Question # 8
# ---------------------------------------------------------------------

def simulate_pval(grades, N):
    """
