In [None]:
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
import plotly.graph_objects as go
from scipy import stats

presurvey = "Data/Fall 2019/CSE 8A Fall 2019 DeID/presurvey.csv"
gradebook = "Data/Fall 2019/CSE 8A Fall 2019 DeID/gradebook.csv"
course = "8A"

<h1> CSE 8A: </h1>

In [None]:
reflections = []
for i in range (1,9):
    df = pd.read_csv("Data/Fall 2019/CSE 8A Fall 2019 DeID/R" +str(i)+".csv")
    reflections.append(df)

<b> Produces the Transfer and Native Student DataFrames for the given category:</b>

In [None]:
def scaled_categories(reflections, category, label, x, scale1, scale5):
    categoryIssue = 0
    for i in range(x):
        reflections[i][category] = reflections[i][category].replace(to_replace = scale1, value = 1)
        reflections[i][category] = reflections[i][category].replace(to_replace = "2", value = 2)
        reflections[i][category] = reflections[i][category].replace(to_replace = "3", value = 3)
        reflections[i][category] = reflections[i][category].replace(to_replace = "4", value = 4)    
        reflections[i][category] = reflections[i][category].replace(to_replace = scale5, value = 5)
        category_issue = reflections[i][category]
        DeID = reflections[i]["DeID"]
        s = label + str(i) 
        cat = DataFrame({"DeID": DeID, s: category_issue})
        if i == 0:
            categoryIssue = cat
        else:
            categoryIssue = pd.merge(categoryIssue, cat, on="DeID")
            
    df = pd.read_csv(presurvey)
    status = df["Did.you.enter.UCSD.as.a.transfer.student.from.another..2.year.or.4.year..college.or.university."] == "Yes"
    DeID = df["DeID"]
    transferStatus = DataFrame({"DeID": DeID, "Transfer Status": status})

    mergedData = pd.merge(transferStatus, categoryIssue, on="DeID")
    transfer = mergedData[mergedData["Transfer Status"] == True]
    native = mergedData[mergedData["Transfer Status"] == False]
    
    return {'transfer' : transfer, 'native' : native}

<b> Produces the Diverging Graph Plot : </b>

In [None]:
def DivergingGraphPlot(transfer, native, label, graphTitle, X_title, Y_title):
    transferDistribution = [0,0,0,0,0]
    nativeDistribution = [0,0,0,0,0]
    for i in transfer.index:
        for j in transfer:
            if(j == "DeID" or j == "Transfer Status"):
                continue
            transferDistribution[int(transfer[j][i])-1] += 1;

    for i in native.index:
        for j in native:
            if(j == "DeID" or j == "Transfer Status"):
                continue
            nativeDistribution[int(native[j][i])-1] += 1;
    
    sumN = sum(nativeDistribution)
    sumT = sum(transferDistribution)
    for i in range(5):
        nativeDistribution[i] /= sumN/100
        transferDistribution[i] /= sumT/100
    
    d = {
    'Who' : ["Transfer", "Native"],
    '1 (Not at all)' : [transferDistribution[0],nativeDistribution[0]],
    '2' : [transferDistribution[1],nativeDistribution[1]],
    '3' : [transferDistribution[2],nativeDistribution[2]],
    '4' : [transferDistribution[3],nativeDistribution[3]],
    '5 (Significant)' :[transferDistribution[4],nativeDistribution[4]]
    }
    df = pd.DataFrame(d)
    display(df)
    
    fig = go.Figure()
    for col in df.columns[1:]:
        fig.add_trace(go.Bar(x= df[col],
                             y =df['Who'],
                             orientation='h',
                             name= col,
                             hovertemplate="%{y}: %{x}"))    

    fig.update_layout(title=graphTitle,
                      xaxis_title=X_title,
                      yaxis_title=Y_title,
                      legend_title="Scale",
                      barmode='relative', 
                      height=400, 
                      width=700, 
                      yaxis_autorange='reversed',
                      bargap=0.01,
                      legend_orientation ='h',
                      legend_x=0.05, legend_y=1.2
                     )
    
    return fig

<b> Produces Box Plots and Removes Outliers </b>

In [None]:
def OutlierLineGraph(transfer, native, label, graphTitle, X_title, Y_title, x_label, x):         
    transferIssues = []
    nativeIssues = []
    difference = []
    utestStatistics = []
    pValuesU = []
    for i in range(x):
        df = pd.DataFrame({label+str(i):transfer[label+str(i)]})
        df['z_score']=stats.zscore(transfer[label+str(i)])
        df = df[df['z_score'].abs()<=3]
        transferRefined = df[df[label+str(i)] >= 0]
        
        df = pd.DataFrame({label+str(i):native[label+str(i)]})
        df['z_score']=stats.zscore(native[label+str(i)])
        df = df[df['z_score'].abs()<=3]
        nativeRefined = df[df[label+str(i)] >= 0]
        
        if(i == 0 or i == x-1):
            data = [transfer[label+str(i)], native[label+str(i)]]
            
            fig = plt.figure(figsize =(10, 7)) 
            ax = fig.add_subplot(111) 
            bp = ax.boxplot(data, patch_artist = True, notch ='True', vert = 0)
            
            colors = ['#0000FF', '#00FF00']
            for patch, color in zip(bp['boxes'], colors): 
                patch.set_facecolor(color) 
                
            # changing color and linewidth of 
            # whiskers 
            for whisker in bp['whiskers']: 
                whisker.set(color ='#8B008B', 
                            linewidth = 1.5, 
                            linestyle =":") 

            # changing color and linewidth of 
            # caps 
            for cap in bp['caps']: 
                cap.set(color ='#8B008B', 
                        linewidth = 2) 

            # changing color and linewidth of 
            # medians 
            for median in bp['medians']: 
                median.set(color ='red', 
                           linewidth = 3) 

            # changing style of fliers 
            for flier in bp['fliers']: 
                flier.set(marker ='D', 
                          color ='#e7298a', 
                          alpha = 0.5) 

            # x-axis labels 
            ax.set_yticklabels(['Transfer', 'Native']) 

            # Adding title  
            plt.title("Week " + str(i+2) + " Box Plot" ) 

            # Removing top axes and right axes 
            # ticks 
            ax.get_xaxis().tick_bottom() 
            ax.get_yaxis().tick_left() 
            
            plt.xlim(-6, 30)

            # show plot 
            plt.show(bp) 
        
        transferIssues.append(transferRefined[label+str(i)].mean())
        nativeIssues.append(nativeRefined[label+str(i)].mean())
        difference.append(transferIssues[i] - nativeIssues[i])
        ttest_results = stats.ttest_ind(transferRefined[label+str(i)], nativeRefined[label+str(i)])
        utest_results = stats.mannwhitneyu(transferRefined[label+str(i)], nativeRefined[label+str(i)])
        utestStatistics.append(utest_results[0])
        pValuesU.append(utest_results[1])
        
    df = DataFrame({"Week" : Series(x_label), 
                    "Transfer Mean" : Series(transferIssues), 
                    "Native Mean" : Series(nativeIssues), 
                    "Difference" : difference,
                    "p-value (u-test)" : pValuesU,
                    "Test Statistic (u-test)" : utestStatistics})
    
    plt.ylim(0, 14)
    plt.plot(x_label, transferIssues, "-ok", color='blue')
    plt.plot(x_label, nativeIssues, "-ok", color='green')
    plt.legend(['transfers','natives'])
    plt.suptitle(graphTitle, fontsize=14)
    plt.xlabel(X_title, fontsize=10)
    plt.ylabel(Y_title, fontsize=10)
    
    return df

<b> Produces the Line Graph: </b>

In [None]:
def plotLineGraph(transfer, native, label, graphTitle, X_title, Y_title, x_label, x):
    transferIssues = []
    nativeIssues = []
    difference = []
    utestStatistics = []
    pValuesU = []
    for i in range(x):
        transferIssues.append(transfer[label+str(i)].mean())
        nativeIssues.append(native[label+str(i)].mean())
        difference.append(transferIssues[i] - nativeIssues[i])
        utest_results = stats.mannwhitneyu(transfer[label+str(i)], native[label+str(i)], use_continuity=False)
        utestStatistics.append(utest_results[0])
        pValuesU.append(utest_results[1])
        
    df = DataFrame({"Week" : Series(x_label), 
                    "Transfer Mean" : Series(transferIssues), 
                    "Native Mean" : Series(nativeIssues), 
                    "Difference" : difference,
                    "p-value (u-test)" : pValuesU,
                    "Test Statistic (u-test)" : utestStatistics})
    
    
    plt.plot(x_label, transferIssues, "-ok", color='blue')
    plt.plot(x_label, nativeIssues, "-ok", color='green')
    plt.ylim(0.5, 5.5)
    plt.legend(['transfers','natives'])
    plt.suptitle(graphTitle, fontsize=14)
    plt.xlabel(X_title, fontsize=10)
    plt.ylabel(Y_title, fontsize=10)
    
    return df

<h4> PA Score Calculator </h4>

In [None]:
def PA_scores (path, number1, number2, label, graphLabel, X_label, Y_label, x_label):
    df = pd.read_csv(path)
    transfer = 0
    native = 0
    data = df["DeID"]
    for i in range (number1, number2+1):
        data = pd.merge(data, df[["DeID", "PA"+str(i)]], on="DeID")
        data["PA"+str(i)] = data["PA"+str(i)]*100
    
    df = pd.read_csv(presurvey)
    status = df["Did.you.enter.UCSD.as.a.transfer.student.from.another..2.year.or.4.year..college.or.university."] == "Yes"
    DeID = df["DeID"]
    transferStatus = DataFrame({"DeID": DeID, "Transfer Status": status})
    
    course_info = pd.read_csv("Data/Fall 2019/CSE Alvarado Course Grade Data.csv")
    course_info = course_info[course_info["Grade (End of Term)"] != "Dropped"]
    course_info = course_info[course_info["Enrollment Flag (End of Term)"] != "N"]
    course_info = course_info[course_info["Enrollment Flag (3rd Week)"] != "N"]
    course_info = course_info[course_info["Course"] == course]

    mergedData = pd.merge(transferStatus, data, on="DeID")
    mergedData = pd.merge(mergedData, course_info, on="DeID")
    transfer = mergedData[mergedData["Transfer Status"] == True]
    native = mergedData[mergedData["Transfer Status"] == False]
    transfer = transfer.drop(["Course", "Section ID"], axis = 1)
    native = native.drop(["Course", "Section ID"], axis = 1)
    
    transferIssues = []
    nativeIssues = []
    difference = []
    ttestStatistics = []
    pValuesT = []
    for i in range(number1, number2+1):
        transferIssues.append(transfer[label+str(i)].mean())
        nativeIssues.append(native[label+str(i)].mean())
        difference.append(transferIssues[i-number1] - nativeIssues[i-number1])
        ttest_results = stats.ttest_ind(transfer[label+str(i)], native[label+str(i)])
        ttestStatistics.append(ttest_results[0])
        pValuesT.append(ttest_results[1])
        
    df = DataFrame({"Week" : Series(x_label), 
                    "Transfer Mean" : Series(transferIssues), 
                    "Native Mean" : Series(nativeIssues), 
                    "Difference" : difference,
                    "p-value (t-test)" : pValuesT,
                    "Test Statistic (t-test)" : ttestStatistics})
    
    
    plt.plot(x_label, transferIssues, "-ok", color='blue')
    plt.plot(x_label, nativeIssues, "-ok", color='green')
    plt.ylim(0, 105)
    plt.legend(['transfers','natives'])
    plt.suptitle(graphLabel, fontsize=14)
    plt.xlabel(X_label, fontsize=10)
    plt.ylabel(Y_label, fontsize=10)
    plt.show()
    
    x = np.arange(len(transferIssues))
    bar_width = 0.3
    plt.bar(x,transferIssues,width = bar_width,color = 'blue', zorder = 2)
    plt.bar(x + bar_width, nativeIssues, width = bar_width, color = 'orange', zorder = 2)
    plt.ylim(0, 105)
    plt.xticks(x+bar_width*0.5,x_label)
    plt.legend(['transfers','natives'])
    plt.suptitle(graphLabel, fontsize=14)
    plt.xlabel(X_label, fontsize=10)
    plt.ylabel(Y_label, fontsize=10)
    plt.show()

    return {'transfer' : transfer, 'native' : native, 'df' : df}

<b> Attendance Calculator <b>

In [None]:
def attendance_scores (path, label, graphLabel, X_label, Y_label, x_label, search):
    df = pd.read_csv(path)
    transfer = 0
    native = 0
    data = df["DeID"]
    i = 1
    for col in df:
        if(col.find(search) != -1):
            data = pd.merge(data, df[["DeID", col]], on="DeID")
            x_label.append("L" + str(i))
            i = i + 1
    
    df = pd.read_csv(presurvey)
    status = df["Did.you.enter.UCSD.as.a.transfer.student.from.another..2.year.or.4.year..college.or.university."] == "Yes"
    DeID = df["DeID"]
    transferStatus = DataFrame({"DeID": DeID, "Transfer Status": status})
    
    course_info = pd.read_csv("Data/Fall 2019/CSE Alvarado Course Grade Data.csv")
    course_info = course_info[course_info["Grade (End of Term)"] != "Dropped"]
    course_info = course_info[course_info["Enrollment Flag (End of Term)"] != "N"]
    course_info = course_info[course_info["Enrollment Flag (3rd Week)"] != "N"]
    course_info = course_info[course_info["Course"] == course]

    mergedData = pd.merge(transferStatus, data, on="DeID")
    mergedData = pd.merge(mergedData, course_info, on="DeID")
    transfer = mergedData[mergedData["Transfer Status"] == True]
    native = mergedData[mergedData["Transfer Status"] == False]
    transfer = transfer.drop(["Course", "Section ID"], axis = 1)
    native = native.drop(["Course", "Section ID"], axis = 1)
    
    transferIssues = []
    nativeIssues = []
     
    df = pd.read_csv(path)
    for col in df:
        if(col.find(search) != -1):
            transferIssues.append(transfer[col].mean()*100)
            nativeIssues.append(native[col].mean()*100)
    
    df = DataFrame({"Lecture" : Series(x_label), 
                    "Transfer Mean" : Series(transferIssues), 
                    "Native Mean" : Series(nativeIssues)})
    
    plt.figure(figsize=(10, 5))
    plt.plot(x_label, transferIssues, "-ok", color='blue')
    plt.plot(x_label, nativeIssues, "-ok", color='green')
    plt.ylim(0, 100)
    plt.legend(['transfers','natives'])
    plt.suptitle(graphLabel, fontsize=14)
    plt.xlabel(X_label, fontsize=10)
    plt.ylabel(Y_label, fontsize=10)

    return {'transfer' : transfer, 'native' : native, 'df' : df, 'plt' :plt}

<h3> PA Scores </h3>

In [None]:
path = gradebook
number1 = 1
number2 = 8
X_label = ""
Y_label = "PA Scores"
x_label = ["PA1", "PA2", "PA3", "PA4", "PA5", "PA6", "PA7", "PA8"]
graphLabel = "PA Scores"
label = "PA"
data = PA_scores(path, number1, number2, label, graphLabel, X_label, Y_label, x_label)
transfer = data['transfer']
native = data['native']
df = data['df']
display(df)

totalT = transfer.mean(axis=1)
PAscores_transfers = totalT.mean()

totalN = native.mean(axis=1)
PAscores_natives = totalN.mean()

<h3> Lecture Attendance </h3>

In [None]:
path = gradebook
X_label = "Lecture"
search = "Clicker"
Y_label = "Attendance"
x_label = []
graphLabel = "Attendance Percentage"
label = "Attendance"
data = attendance_scores(path, label, graphLabel, X_label, Y_label, x_label, search)

transfer = data['transfer']
native = data['native']
df = data['df']
plt = data['plt']

display(df)


totalTransfer = transfer.sum(axis = 1)
AverageTransfersAttendance = totalTransfer/22
AverageTransfersAttendance.loc[AverageTransfersAttendance > 1] = 1
AverageTransfersAttendance = AverageTransfersAttendance.mean()

totalNative = native.sum(axis = 1)
AverageNativesAttendance = totalNative/22
AverageNativesAttendance.loc[AverageNativesAttendance > 1] = 1
AverageNativesAttendance = AverageNativesAttendance.mean()

<h3> Final  Exam:</h3>

In [None]:
path = gradebook
X_label = "Final"
search = X_label
Y_label = "Attendance"
x_label = []
graphLabel = "Final Exam"
label = "Attendance"
data = attendance_scores(path, label, graphLabel, X_label, Y_label, x_label, search)

transfer = data['transfer']
native = data['native']
df = data['df']
data['plt'].close()

df["Test Statistics"], df["p-Value"] = stats.ttest_ind(native["Final.exam.percentage..out.of.100.or.between.0.and.1."], transfer["Final.exam.percentage..out.of.100.or.between.0.and.1."])
display(df[["Transfer Mean", "Native Mean", "Test Statistics", "p-Value"]])
transfers_final_avg = df["Transfer Mean"]
natives_final_avg = df["Native Mean"]

<h3> Overall Grade: </h3>

In [None]:
path = gradebook
X_label = "Overall"
search = "Overall"
Y_label = "Attendance"
x_label = []
graphLabel = "Overall Grade"
label = "Attendance"
data = attendance_scores(path, label, graphLabel, X_label, Y_label, x_label, search)
transfer = data['transfer']
native = data['native']
df = data['df']
data['plt'].close()
df["Test Statistics"], df["p-Value"] = stats.ttest_ind(native["Overall.weighted.course.percentage..out.of.100.or.between.0.and.1..above.1.OK.if.E.C.."], transfer["Overall.weighted.course.percentage..out.of.100.or.between.0.and.1..above.1.OK.if.E.C.."])
display(df[["Transfer Mean", "Native Mean", "Test Statistics", "p-Value"]])

transfers_overall_avg = df["Transfer Mean"]
natives_overall_avg = df["Native Mean"]

<h3> Overall Performance Graph </h3>

In [None]:
y1 = [AverageTransfersAttendance*100,PAscores_transfers,transfers_final_avg,transfers_overall_avg] #represents the first bar for transfer data
y2 = [AverageNativesAttendance*100,PAscores_natives,natives_final_avg,natives_overall_avg]

x = np.arange(len(y1)) #creates a range of values with a count of 3 so we can place things on the x-axis

bar_width = 0.3
plt.bar(x,y1,width = bar_width,color = 'blue', zorder = 2)
plt.bar(x + bar_width, y2, width = bar_width, color = 'orange', zorder = 2)

plt.xticks(x+bar_width*0.5,['Attendance','PA scores','Final exam','Overall course'])
plt.xlabel('CSE 8A course component', labelpad = 15)
plt.ylabel('average percentage scored')

#adding the legend:
blue_patch = mpatches.Patch(color = 'blue', label = 'transfers')
orange_patch = mpatches.Patch(color = 'orange', label = 'natives')
plt.legend(handles = [blue_patch,orange_patch],loc = (1.05,0.85))

plt.ylim(0,100)


plt.grid(axis = 'y')

plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')

plt.show()

<h3>Hours Spent on the Course</h3>

In [None]:
category = "In.the.last.week..approximately.how.many.hours.did.you.spend.outside.of.class.time.working.on.work.for.this.course."
label = "# of Hours "
graphTitle = "Hours Spent"
X_title = "Percentage"
Y_title = "Student Type"
x = 8
scale1 = "1"
scale5 = "5"
data = scaled_categories(reflections, category, label, x, scale1, scale5)
transfer = data['transfer']
native = data['native']

In [None]:
X_label = ""
Y_label = "# of Hours"
x_label = ["Week 2", "Week 3", "Week 4", "Week 5", "Week 6", "Week 7", "Week 8", "Week 9"]
graphLabel = "Hours Spent"
df = OutlierLineGraph(transfer, native, label, graphLabel, X_label, Y_label, x_label, x)
display(df)

<h3> Confusion about the Assignment </h3>

In [None]:
category = "In.the.past.week..on.a.scale.of.1.to.5..to.what.degree.did.each.of.the.following.interfere.with.your.ability.to.learn.and.complete.the.work.for.this.course..Confusion.specifically.about.the.assignment."
label = "Confusion"
graphTitle = "Confusion about the Assignment"
X_title = "Percentage"
Y_title = "Student Type"
x = 8
scale1 = "1 (Not at all)"
scale5 = "5 (Significantly)"
data = scaled_categories(reflections, category, label, x, scale1, scale5)
transfer = data['transfer']
native = data['native']

In [None]:
fig = DivergingGraphPlot(transfer, native, label, graphTitle, X_title, Y_title)
fig.show()

X_label = ""
Y_label = "1(Not at All) ... 5(Significantly)"
x_label = ["Week 2", "Week 3", "Week 4", "Week 5", "Week 6", "Week 7", "Week 8", "Week 9"]
graphLabel = graphTitle
df = plotLineGraph(transfer, native, label, graphLabel, X_label, Y_label, x_label, x)
display(df)

<h3> Confusion about the Material </h3>

In [None]:
category = "In.the.past.week..on.a.scale.of.1.to.5..to.what.degree.did.each.of.the.following.interfere.with.your.ability.to.learn.and.complete.the.work.for.this.course..Confusion.generally.about.the.material."
label = "Confusion"
graphTitle = "Confusion about the Material"
X_title = "Percentage"
Y_title = "Student Type"
x = 8
scale1 = "1 (Not at all)"
scale5 = "5 (Significantly)"
data = scaled_categories(reflections, category, label, x, scale1, scale5)
transfer = data['transfer']
native = data['native']

In [None]:
fig = DivergingGraphPlot(transfer, native, label, graphTitle, X_title, Y_title)
fig.show()

X_label = ""
Y_label = "1(Not at All) ... 5(Significantly)"
x_label = ["Week 2", "Week 3", "Week 4", "Week 5", "Week 6", "Week 7", "Week 8", "Week 9"]
graphLabel = graphTitle
df = plotLineGraph(transfer, native, label, graphLabel, X_label, Y_label, x_label, x)
display(df)