In [None]:
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import env

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

### Acquire

In [None]:
url = f'mysql+pymysql://{env.user}:{env.password}@{env.host}/curriculum_logs'
query = '''
Select * from logs
left join cohorts on logs.cohort_id = cohorts.id
ORDER BY date ASC, time ASC;
'''
df = pd.read_sql(query, url)

In [None]:
print(df.shape)
df.info()

In [5]:
#Dropping columns with an inordinate number of nulls (rendering variable essentially useless)
def drop_columns(df):
    df = df.drop(columns = [
        'updated_at',
        'deleted_at',
        'slack',
        'id'
    ],
    axis=1)
    return df


def handle_nulls(df):    
    # We keep % of the data after dropping nulls
    # round(df.dropna().shape[0] / df.shape[0], 4) returned ...
    df = df.dropna()
    return df

def parse_path(path):
    parts = path.split("/")
    output = {}
    if len(parts) == 1:
        output['primary_topic'] = parts[0]
        output['subtopic'] = 'None'
        output['tertiary'] = 'None'
    elif len(parts) == 2:
        output['primary_topic'] = parts[0]
        output['subtopic'] = parts[1]
        output['tertiary'] = 'None'
    else: 
        output['primary_topic'] = parts[0]
        output['subtopic'] = parts[1]
        output['tertiary'] = parts[2]
    return pd.Series(output)

In [6]:
df = drop_columns(df)

In [7]:
df = handle_nulls(df)

In [10]:
# Convert 

df.date = pd.to_datetime(df.date)
#df.time = pd.to_datetime(df.time)
df.start_date = pd.to_datetime(df.start_date)
df.end_date = pd.to_datetime(df.end_date)
df.created_at = pd.to_datetime(df.created_at)
#df['timestamp']=df.apply(lambda x:'%s-%s' % (x['date'],x['time']),axis=1)
#df.timestamp = pd.to_datetime(df.timestamp)
#df = df.drop(columns=['updated_at','created_at', 'id', 'deleted_at'])
df['cohort_id'] = df.cohort_id.astype(int)
#df['program_id'] = df.program_id.map({1: 'data_science', 2: 'web_dev'})

In [11]:
tf = df.path.apply(parse_path)

In [12]:
df = pd.concat([df, tf], axis=1)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 847329 entries, 0 to 900222
Data columns (total 14 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   date           847329 non-null  datetime64[ns]
 1   time           847329 non-null  object        
 2   path           847329 non-null  object        
 3   user_id        847329 non-null  int64         
 4   cohort_id      847329 non-null  int64         
 5   ip             847329 non-null  object        
 6   name           847329 non-null  object        
 7   start_date     847329 non-null  datetime64[ns]
 8   end_date       847329 non-null  datetime64[ns]
 9   created_at     847329 non-null  datetime64[ns]
 10  program_id     847329 non-null  float64       
 11  primary_topic  847329 non-null  object        
 12  subtopic       847329 non-null  object        
 13  tertiary       847329 non-null  object        
dtypes: datetime64[ns](4), float64(1), int64(2), object(7

In [14]:
df.head(20)

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary
0,2018-01-26,09:55:03,/,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,,,
1,2018-01-26,09:56:02,java-ii,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,java-ii,,
2,2018-01-26,09:56:05,java-ii/object-oriented-programming,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,java-ii,object-oriented-programming,
3,2018-01-26,09:56:06,slides/object_oriented_programming,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,slides,object_oriented_programming,
4,2018-01-26,09:56:24,javascript-i/conditionals,2,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,conditionals,
5,2018-01-26,09:56:41,javascript-i/loops,2,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,loops,
6,2018-01-26,09:56:46,javascript-i/conditionals,3,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,conditionals,
7,2018-01-26,09:56:48,javascript-i/functions,3,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,functions,
8,2018-01-26,09:56:59,javascript-i/loops,2,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,loops,
9,2018-01-26,09:58:26,javascript-i/functions,4,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,functions,


### retrive observations after cohort end date

In [None]:
# Code of page vists after time with codeup counts
(df.date > df.end_date).value_counts()

In [15]:
after_grad = df[(df.date > df.end_date)]

In [16]:
(after_grad.date > after_grad.end_date).value_counts()

True    199834
dtype: int64

In [17]:
full_stack_php = after_grad[after_grad.program_id == 1]
full_stack_java = after_grad[after_grad.program_id == 2]
data_science = after_grad[after_grad.program_id == 3]
front_end = after_grad[after_grad.program_id == 4]

#### Php

In [None]:
full_stack_php

In [73]:
full_stack_php.groupby(['program_id', 'path', 'primary_topic', 'subtopic', 'tertiary' ]).agg(['count']).reset_index().sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,program_id,path,primary_topic,subtopic,tertiary,date,time,user_id,cohort_id,ip,name,start_date,end_date,created_at
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,count,count,count,count,count,count,count,count,count
0,1.0,/,,,,1681,1681,1681,1681,1681,1681,1681,1681,1681
506,1.0,index.html,index.html,,,1011,1011,1011,1011,1011,1011,1011,1011,1011
544,1.0,javascript-i,javascript-i,,,736,736,736,736,736,736,736,736,736
483,1.0,html-css,html-css,,,542,542,542,542,542,542,542,542,542
676,1.0,spring,spring,,,501,501,501,501,501,501,501,501,501
533,1.0,java-iii,java-iii,,,479,479,479,479,479,479,479,479,479
520,1.0,java-ii,java-ii,,,454,454,454,454,454,454,454,454,454
512,1.0,java-i,java-i,,,444,444,444,444,444,444,444,444,444
567,1.0,javascript-ii,javascript-ii,,,429,429,429,429,429,429,429,429,429
64,1.0,appendix,appendix,,,409,409,409,409,409,409,409,409,409


*** Takeaways**

Top 5 lessons visited by Full Stack Php program graduates:
- index.html
- javascript-i
- html-css
- spring
- java-iii


In [19]:
full_stack_php.name.value_counts()

Lassen        9587
Arches        8890
Olympic       4954
Kings         2845
Hampton       1712
Quincy        1237
Glacier        598
Joshua         302
Ike            253
Badlands        93
Franklin        72
Denali           4
Everglades       1
Name: name, dtype: int64

In [None]:
php_primary_count = (full_stack_php.groupby('name').primary_topic.value_counts())
php_primary_count

In [None]:
full_stack_php.groupby('name').primary_topic.describe()

In [None]:
php_primary = pd.DataFrame(php_primary_count)
php_primary

In [None]:
php_primary['primary_topic']

#### Java

In [71]:
full_stack_java.groupby(['program_id', 'path', 'primary_topic', 'subtopic', 'tertiary' ]).agg(['count']).reset_index().sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,program_id,path,primary_topic,subtopic,tertiary,date,time,user_id,cohort_id,ip,name,start_date,end_date,created_at
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,count,count,count,count,count,count,count,count,count
1,2.0,/,,,,12406,12406,12406,12406,12406,12406,12406,12406,12406
1333,2.0,javascript-i,javascript-i,,,4229,4229,4229,4229,4229,4229,4229,4229,4229
1543,2.0,spring,spring,,,3760,3760,3760,3760,3760,3760,3760,3760,3760
1509,2.0,search/search_index.json,search,search_index.json,,3562,3562,3562,3562,3562,3562,3562,3562,3562
1264,2.0,html-css,html-css,,,3136,3136,3136,3136,3136,3136,3136,3136,3136
1319,2.0,java-iii,java-iii,,,3058,3058,3058,3058,3058,3058,3058,3058,3058
1307,2.0,java-ii,java-ii,,,2985,2985,2985,2985,2985,2985,2985,2985,2985
1300,2.0,java-i,java-i,,,2679,2679,2679,2679,2679,2679,2679,2679,2679
702,2.0,appendix,appendix,,,2662,2662,2662,2662,2662,2662,2662,2662,2662
1362,2.0,javascript-ii,javascript-ii,,,2549,2549,2549,2549,2549,2549,2549,2549,2549


*** Takeaways**

Top 5 lessons visited by Full Stack Java program graduates:
- javascript-i
- spring
- search/search_index.json
- html-css
- java-iii


In [70]:
full_stack_java.name

72          Niagara
73          Niagara
112       Pinnacles
113       Pinnacles
116       Pinnacles
            ...    
900218        Staff
900219        Staff
900220        Staff
900221        Staff
900222        Staff
Name: name, Length: 157786, dtype: object

In [None]:
java_primary_count = (full_stack_java.groupby('name').primary_topic.value_counts())
java_primary_count

In [None]:
pd.DataFrame(java_primary_count)

#### Data Science

In [20]:
data_science

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary
443671,2020-01-31,11:05:04,/,476,34,136.50.49.145,Bayes,2019-08-19,2020-01-30,2019-08-20 14:38:55,3.0,,,
443672,2020-01-31,11:05:13,1-fundamentals/1.1-intro-to-data-science,476,34,136.50.49.145,Bayes,2019-08-19,2020-01-30,2019-08-20 14:38:55,3.0,1-fundamentals,1.1-intro-to-data-science,
443673,2020-01-31,11:05:13,1-fundamentals/modern-data-scientist.jpg,476,34,136.50.49.145,Bayes,2019-08-19,2020-01-30,2019-08-20 14:38:55,3.0,1-fundamentals,modern-data-scientist.jpg,
443674,2020-01-31,11:05:13,1-fundamentals/AI-ML-DL-timeline.jpg,476,34,136.50.49.145,Bayes,2019-08-19,2020-01-30,2019-08-20 14:38:55,3.0,1-fundamentals,AI-ML-DL-timeline.jpg,
443948,2020-01-31,14:44:59,/,476,34,136.50.49.145,Bayes,2019-08-19,2020-01-30,2019-08-20 14:38:55,3.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900093,2021-04-21,15:20:12,classification/scale_features_or_not.svg,692,59,96.8.130.134,Darden,2020-07-13,2021-01-12,2020-07-13 18:32:19,3.0,classification,scale_features_or_not.svg,
900094,2021-04-21,15:20:12,classification/classical_programming_vs_machin...,692,59,96.8.130.134,Darden,2020-07-13,2021-01-12,2020-07-13 18:32:19,3.0,classification,classical_programming_vs_machine_learning.jpeg,
900095,2021-04-21,15:20:12,classification/overview,692,59,96.8.130.134,Darden,2020-07-13,2021-01-12,2020-07-13 18:32:19,3.0,classification,overview,
900096,2021-04-21,15:20:14,classification/project,692,59,96.8.130.134,Darden,2020-07-13,2021-01-12,2020-07-13 18:32:19,3.0,classification,project,


In [37]:
data_science.drop(['start_date', 'end_date', 'created_at'], axis =1, inplace=True)

In [66]:
data_science.groupby(['program_id', 'path', 'primary_topic', 'subtopic', 'tertiary' ]).agg(['count']).reset_index().sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,program_id,path,primary_topic,subtopic,tertiary,date,time,user_id,cohort_id,ip,name
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,count,count,count,count,count,count
0,3.0,/,,,,1436,1436,1436,1436,1436,1436
371,3.0,search/search_index.json,search,search_index.json,,493,493,493,493,493,493
388,3.0,sql/mysql-overview,sql,mysql-overview,,275,275,275,275,275,275
252,3.0,classification/overview,classification,overview,,266,266,266,266,266,266
257,3.0,classification/scale_features_or_not.svg,classification,scale_features_or_not.svg,,219,219,219,219,219,219
194,3.0,anomaly-detection/AnomalyDetectionCartoon.jpeg,anomaly-detection,AnomalyDetectionCartoon.jpeg,,193,193,193,193,193,193
199,3.0,anomaly-detection/overview,anomaly-detection,overview,,191,191,191,191,191,191
284,3.0,fundamentals/AI-ML-DL-timeline.jpg,fundamentals,AI-ML-DL-timeline.jpg,,189,189,189,189,189,189
305,3.0,fundamentals/modern-data-scientist.jpg,fundamentals,modern-data-scientist.jpg,,187,187,187,187,187,187
302,3.0,fundamentals/intro-to-data-science,fundamentals,intro-to-data-science,,184,184,184,184,184,184


*** Takeaways**

Top 5 lessons visited by Data Scientisit program graduates:
- search/search_index.json
- sql/mysql-overview
- classification/overview
- classification/scale_features_or_not.svg
- anomaly-detection/AnomalyDetectionCartoon.jpeg


In [38]:
df1 = data_science.groupby(['name', 'primary_topic', 'subtopic', 'tertiary' ]).agg(['count']).reset_index()

In [39]:
df1

Unnamed: 0_level_0,name,primary_topic,subtopic,tertiary,date,time,path,user_id,cohort_id,ip,program_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count,count,count,count,count,count,count
0,Bayes,,,,491,491,491,491,491,491,491
1,Bayes,1-fundamentals,1.1-intro-to-data-science,,114,114,114,114,114,114,114
2,Bayes,1-fundamentals,1.2-data-science-pipeline,,9,9,9,9,9,9,9
3,Bayes,1-fundamentals,1.3-pipeline-demo,,4,4,4,4,4,4,4
4,Bayes,1-fundamentals,2.1-excel-overview,,3,3,3,3,3,3,3
...,...,...,...,...,...,...,...,...,...,...,...
872,Darden,timeseries,modeling-lesson1,,24,24,24,24,24,24,24
873,Darden,timeseries,overview,,11,11,11,11,11,11,11
874,Darden,timeseries,prep,,11,11,11,11,11,11,11
875,Darden,timeseries,project,,7,7,7,7,7,7,7


In [60]:
df1.sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,name,primary_topic,subtopic,tertiary,date,time,path,user_id,cohort_id,ip,program_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count,count,count,count,count,count,count
400,Curie,,,,564,564,564,564,564,564,564
0,Bayes,,,,491,491,491,491,491,491,491
685,Darden,,,,381,381,381,381,381,381,381
340,Bayes,search,search_index.json,,187,187,187,187,187,187,187
627,Curie,search,search_index.json,,157,157,157,157,157,157,157
824,Darden,search,search_index.json,,149,149,149,149,149,149,149
535,Curie,classification,overview,,136,136,136,136,136,136,136
641,Curie,sql,mysql-overview,,125,125,125,125,125,125,125
837,Darden,sql,mysql-overview,,123,123,123,123,123,123,123
15,Bayes,1-fundamentals,modern-data-scientist.jpg,,115,115,115,115,115,115,115


In [25]:
df1.columns

MultiIndex([(         'name',      ''),
            ('primary_topic',      ''),
            (     'subtopic',      ''),
            (     'tertiary',      ''),
            (         'date', 'count'),
            (         'time', 'count'),
            (         'path', 'count'),
            (      'user_id', 'count'),
            (    'cohort_id', 'count'),
            (           'ip', 'count'),
            (   'start_date', 'count'),
            (     'end_date', 'count'),
            (   'created_at', 'count'),
            (   'program_id', 'count')],
           )

In [41]:
df1['name'].value_counts()

Bayes     400
Curie     285
Darden    192
Name: name, dtype: int64

In [43]:
# Splits each cohort into a separate dataframe
bayes = df1.loc[df1['name'] == 'Bayes']
curie = df1.loc[df1['name'] == 'Curie']
darden = df1.loc[df1['name'] == 'Darden']

In [None]:
# Splits each cohort into a separate dataframe
bayes = data_science.loc[data_science['name'] == 'Bayes']
curie = data_science.loc[data_science['name'] == 'Curie']
darden = data_science.loc[data_science['name'] == 'Darden']

In [55]:
bayes.sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,name,primary_topic,subtopic,tertiary,date,time,path,user_id,cohort_id,ip,program_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count,count,count,count,count,count,count
0,Bayes,,,,491,491,491,491,491,491,491
340,Bayes,search,search_index.json,,187,187,187,187,187,187,187
15,Bayes,1-fundamentals,modern-data-scientist.jpg,,115,115,115,115,115,115,115
13,Bayes,1-fundamentals,AI-ML-DL-timeline.jpg,,114,114,114,114,114,114,114
1,Bayes,1-fundamentals,1.1-intro-to-data-science,,114,114,114,114,114,114,114
126,Bayes,6-regression,1-overview,,77,77,77,77,77,77,77
21,Bayes,10-anomaly-detection,AnomalyDetectionCartoon.jpeg,,48,48,48,48,48,48,48
16,Bayes,10-anomaly-detection,1-overview,,48,48,48,48,48,48,48
66,Bayes,3-sql,1-mysql-overview,,40,40,40,40,40,40,40
138,Bayes,6-regression,7.0-model,,39,39,39,39,39,39,39


In [59]:
bayes.subtopic.value_counts()

None                                     22
cli                                      16
project                                  10
1-overview                               10
overview                                 10
                                         ..
4.3-correlation                           1
4.4-compare-group-membership              1
4.5-more-statistical-testing-examples     1
5-power-analysis                          1
working-with-time-series-data             1
Name: subtopic, Length: 284, dtype: int64

*** Takeaways**

- Top 5 lessons Bayes graduates visited:
    - search	search_index.json
    - 1-fundamentals	modern-data-scientist.jpg
    - 1-fundamentals	AI-ML-DL-timeline.jpg
    - 1-fundamentals	1.1-intro-to-data-science
    - 6-regression	1-overview

In [56]:
curie.sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,name,primary_topic,subtopic,tertiary,date,time,path,user_id,cohort_id,ip,program_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count,count,count,count,count,count,count
400,Curie,,,,564,564,564,564,564,564,564
627,Curie,search,search_index.json,,157,157,157,157,157,157,157
535,Curie,classification,overview,,136,136,136,136,136,136,136
641,Curie,sql,mysql-overview,,125,125,125,125,125,125,125
555,Curie,fundamentals,AI-ML-DL-timeline.jpg,,101,101,101,101,101,101,101
573,Curie,fundamentals,modern-data-scientist.jpg,,100,100,100,100,100,100,100
539,Curie,classification,scale_features_or_not.svg,,96,96,96,96,96,96,96
571,Curie,fundamentals,intro-to-data-science,,96,96,96,96,96,96,96
489,Curie,anomaly-detection,AnomalyDetectionCartoon.jpeg,,74,74,74,74,74,74,74
493,Curie,anomaly-detection,overview,,72,72,72,72,72,72,72


*** Takeaways**

- Top 5 lessons Curie graduates visited:
    - search	search_index.json
    - classification	overview
    - sql	mysql-overview
    - fundamentals	AI-ML-DL-timeline.jpg
    - fundamentals	modern-data-scientist.jpg	

In [57]:
darden.sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,name,primary_topic,subtopic,tertiary,date,time,path,user_id,cohort_id,ip,program_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count,count,count,count,count,count,count
685,Darden,,,,381,381,381,381,381,381,381
824,Darden,search,search_index.json,,149,149,149,149,149,149,149
837,Darden,sql,mysql-overview,,123,123,123,123,123,123,123
697,Darden,anomaly-detection,AnomalyDetectionCartoon.jpeg,,105,105,105,105,105,105,105
702,Darden,anomaly-detection,overview,,104,104,104,104,104,104,104
742,Darden,classification,overview,,95,95,95,95,95,95,95
746,Darden,classification,scale_features_or_not.svg,,95,95,95,95,95,95,95
764,Darden,fundamentals,AI-ML-DL-timeline.jpg,,64,64,64,64,64,64,64
782,Darden,fundamentals,modern-data-scientist.jpg,,63,63,63,63,63,63,63
780,Darden,fundamentals,intro-to-data-science,,63,63,63,63,63,63,63


*** Takeaways**

- Top 5 lessons Darden graduates visited:
    - search	search_index.json
    - sql	mysql-overview
    - anomaly-detection	AnomalyDetectionCartoon.jpeg
    - anomaly-detection	overview
    - classification	overview	

#### Frontend

In [None]:
front_end

In [69]:
front_end.groupby(['program_id', 'path', 'primary_topic', 'subtopic', 'tertiary' ]).agg(['count']).reset_index().sort_values(by=[('user_id', 'count')], ascending=False).head(25)

Unnamed: 0_level_0,program_id,path,primary_topic,subtopic,tertiary,date,time,user_id,cohort_id,ip,name,start_date,end_date,created_at
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,count,count,count,count,count,count,count,count,count
1,4.0,content/html-css,content,html-css,,2,2,2,2,2,2,2,2,2
0,4.0,/,,,,1,1,1,1,1,1,1,1,1
2,4.0,content/html-css/gitbook/images/favicon.ico,content,html-css,gitbook,1,1,1,1,1,1,1,1,1
3,4.0,content/html-css/introduction.html,content,html-css,introduction.html,1,1,1,1,1,1,1,1,1


** Takeaways **

Front End progarm graduates only have 5 observations since graduating. Not enough data to see what top lessons visited by program.

### Extra code

In [None]:
df[(df.date > df.end_date)].groupby(['name','program_id', 'primary_topic', 'subtopic', 'tertiary']).value_counts()

In [None]:
pd.DataFrame(df[(df.date > df.end_date)].groupby(['name','program_id', 'primary_topic', 'subtopic', 'tertiary']).value_counts())

In [None]:
#df[(df.date > df.end_date)].groupby(['program_id', 'primary_topic', 'subtopic', 'tertiary']).value_counts().plot.barh(figsize=(5,5))

### Observations that occurred after 2019

In [74]:
(df.date >= '2019-01-01').value_counts()

True     675820
False    171509
Name: date, dtype: int64

In [76]:
after_2019 = df[(df.date >= '2019-01-01')]

In [88]:
after_2019.head()

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary
175808,2019-01-01,07:45:45,java-iii/mvc,271,26,73.31.215.224,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-iii,mvc,
175809,2019-01-01,12:20:53,/,51,13,72.179.161.39,Kings,2016-05-23,2016-09-15,2016-06-14 19:52:26,1.0,,,
175810,2019-01-01,12:56:12,java-i,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,,
175811,2019-01-01,12:59:46,java-i/strings,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,strings,
175812,2019-01-01,15:25:49,java-i/methods,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,methods,


In [89]:
# Dictionary of path url for web dev program
web_dev_path= pd.DataFrame(after_2019[after_2019.program_id != 3].path.unique())
web_dev_path.rename({0 : 'path'}, axis=1, inplace=True)
web_dev_path

Unnamed: 0,path
0,java-iii/mvc
1,/
2,java-i
3,java-i/strings
4,java-i/methods
...,...
1901,css
1902,easley-python-assessment.html
1903,florence-python-assessment.html
1904,javascript-i/dom


In [90]:
# Dictionary of path url for data science program
ds_path = pd.DataFrame(after_2019[after_2019.program_id == 3].path.unique())
ds_path.rename({0 : 'path'}, axis=1, inplace=True)
ds_path

Unnamed: 0,path
0,/
1,3-sql/1-mysql-overview
2,2-storytelling/bad-charts
3,2-storytelling/misleading1_baseball.jpg
4,2-storytelling/misleading1_fox.jpg
...,...
677,clustering/hierarchical_circle.png!%5Bimage.pn...
678,individual-project/individual-project
679,classification/explore-old
680,florence-python-assessment.html


In [103]:
after_2019['ds_hit'] = after_2019['path'].isin(ds_path['path'])

In [104]:
after_2019['web_dev_hit'] = after_2019['path'].isin(web_dev_path['path'])

In [106]:
after_2019['both_programs'] =  (after_2019.ds_hit == after_2019.web_dev_hit)

In [107]:
after_2019

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary,ds_hit,web_dev_hit,both_programs
175808,2019-01-01,07:45:45,java-iii/mvc,271,26,73.31.215.224,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-iii,mvc,,False,True,False
175809,2019-01-01,12:20:53,/,51,13,72.179.161.39,Kings,2016-05-23,2016-09-15,2016-06-14 19:52:26,1.0,,,,True,True,True
175810,2019-01-01,12:56:12,java-i,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,,,True,True,True
175811,2019-01-01,12:59:46,java-i/strings,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,strings,,False,True,False
175812,2019-01-01,15:25:49,java-i/methods,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,methods,,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900218,2021-04-21,16:41:51,jquery/personal-site,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,jquery,personal-site,,False,True,False
900219,2021-04-21,16:42:02,jquery/mapbox-api,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,jquery,mapbox-api,,False,True,False
900220,2021-04-21,16:42:09,jquery/ajax/weather-map,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,jquery,ajax,weather-map,False,True,False
900221,2021-04-21,16:44:37,anomaly-detection/discrete-probabilistic-methods,744,28,24.160.137.86,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,anomaly-detection,discrete-probabilistic-methods,,True,True,True


In [None]:
after_2019[after_2019.both_programs == True]

In [None]:
after_2019[after_2019.path != '/']

In [115]:
after_2019[(after_2019.both_programs == True) & (after_2019.path != '/')]

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary,ds_hit,web_dev_hit,both_programs
175810,2019-01-01,12:56:12,java-i,274,26,67.11.239.2,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-i,,,True,True,True
175814,2019-01-01,15:32:24,toc,301,27,72.181.106.116,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,toc,,,True,True,True
175815,2019-01-01,15:32:26,javascript-ii,301,27,72.181.106.116,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,javascript-ii,,,True,True,True
175816,2019-01-01,15:32:28,jquery,301,27,72.181.106.116,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,jquery,,,True,True,True
175819,2019-01-01,16:38:34,java-iii,262,26,97.105.90.179,Xanadu,2018-09-17,2019-02-08,2018-09-17 19:09:51,2.0,java-iii,,,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900214,2021-04-21,16:41:29,javascript-i,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,javascript-i,,,True,True,True
900215,2021-04-21,16:41:31,javascript-ii,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,javascript-ii,,,True,True,True
900216,2021-04-21,16:41:49,jquery,64,28,71.150.217.33,Staff,2014-02-04,2014-02-04,2018-12-06 17:04:19,2.0,jquery,,,True,True,True
900217,2021-04-21,16:41:51,javascript-i/bom-and-dom/dom,875,135,24.242.150.231,Marco,2021-01-25,2021-07-19,2021-01-20 21:31:11,2.0,javascript-i,bom-and-dom,dom,True,True,True


***Takeaway***

- Users does continue to have access to both curriculmns until April 21, 2021.

#### Observations prior to 2019

In [125]:
before_2019 = df[(df.date < '2019-01-01')]

In [126]:
before_2019['ds_hit'] = before_2019['path'].isin(ds_path['path'])

In [127]:
before_2019['web_dev_hit'] = before_2019['path'].isin(web_dev_path['path'])

In [128]:
before_2019['both_programs'] =  (before_2019.ds_hit == before_2019.web_dev_hit)

In [129]:
before_2019[(before_2019.both_programs == True) & (before_2019.path != '/')]

Unnamed: 0,date,time,path,user_id,cohort_id,ip,name,start_date,end_date,created_at,program_id,primary_topic,subtopic,tertiary,ds_hit,web_dev_hit,both_programs
1,2018-01-26,09:56:02,java-ii,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,java-ii,,,True,True,True
2,2018-01-26,09:56:05,java-ii/object-oriented-programming,1,8,97.105.19.61,Hampton,2015-09-22,2016-02-06,2016-06-14 19:52:26,1.0,java-ii,object-oriented-programming,,True,True,True
4,2018-01-26,09:56:24,javascript-i/conditionals,2,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,conditionals,,True,True,True
6,2018-01-26,09:56:46,javascript-i/conditionals,3,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,conditionals,,True,True,True
13,2018-01-26,10:00:39,javascript-i,6,22,97.105.19.61,Teddy,2018-01-08,2018-05-17,2018-01-08 13:59:10,2.0,javascript-i,,,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175794,2018-12-31,15:59:42,javascript-i/conditionals,128,23,69.232.100.94,Ulysses,2018-03-05,2018-07-19,2018-03-05 14:22:11,2.0,javascript-i,conditionals,,True,True,True
175801,2018-12-31,23:51:33,spring,289,27,98.6.94.51,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,spring,,,True,True,True
175802,2018-12-31,23:51:45,java-i,289,27,98.6.94.51,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,java-i,,,True,True,True
175804,2018-12-31,23:51:56,java-iii,289,27,98.6.94.51,Yosemite,2018-11-05,2019-04-03,2018-11-05 15:26:37,2.0,java-iii,,,True,True,True


*** Takeaway ***

- Users did have access both curriculum prior to the year 2019.