# Get Course Information

In [41]:
import requests
import lxml.etree as ET
import lxml.html
import cssselect
import yarl
import io
import csv

In [42]:
def csv_from_url(url, fieldnames=None):
    """Get a csv from a URL"""
    r = requests.get(url)
    reader = csv.DictReader(io.StringIO(r.text), fieldnames=fieldnames)
    return [dict(x) for x in reader]

In [59]:
class WikiEduDashboard:
    
    URL_BASE = "https://dashboard.wikiedu.org"
    
    def __init__(self):
        self._session = requests.Session()
        
    def _get_campaign_page(self):
        url = self.URL_BASE + '/campaigns'
        return self._session.get(url)
        
    def campaigns(self):
        r = self._get_campaign_page()
        html = lxml.html.fromstring(r.text)
        table = html.find('.//*[@id="js-campaigns"]/table')
        rows = table.findall('./tbody/tr')
        colnames = ['title', 'students', 'students_by_course', 'instructors_by_course', 'course_data', 'pages_edited']
        data = {}
        for row in rows:
            links = [self.URL_BASE + x.attrib['href'] for x in row.findall('./td/a')]
            name = links[0].split('/')[-2]
            data[name] = dict(zip(colnames, links))
        return data
    
    def get_campaign_overview(self, campaign):
        url = f'{self.URL_BASE}/campaigns/{campaign}/overview'
        r = self._session.get(url)
        html = lxml.html.fromstring(r.text)
        description_path = ".//*[@class=\"module campaign-description rails_editable\"]/div[2]"
        description = html.find(description_path).text_content().strip()
        name = html.find(".//h2[@class=\"title\"]").text_content().strip()
        return {'description': description, 'name': name}
    
    def get_course(self, slug):
        baseurl = f"{self.URL_BASE}/courses/{slug}"
        out = {}
        out['users'] = self._session.get(f'{baseurl}/users.json').json()
        out['course'] = self._session.get(f'{baseurl}/course.json').json()
        out['tags'] = self._session.get(f'{baseurl}/tags.json').json()
        out['timeline'] = self._session.get(f'{baseurl}/timeline.json').json()
        out['campaigns'] = self._session.get(f'{baseurl}/campaigns.json').json()
        return out

    def get_campaign(self, campaign):
        out = self.get_campaign_overview(campaign)
        out['students'] = csv_from_url(f"{self.URL_BASE}/campaigns/{campaign}/students.csv", ('student',))
        out['students_by_course'] = csv_from_url(f"{self.URL_BASE}/campaigns/{campaign}/students.csv?course=true", ('student', 'course'))
        out['instructors'] = csv_from_url(f"{self.URL_BASE}/campaigns/{campaign}/instructors.csv?course=true", ('instructor', 'course'))
        out['course_data'] = csv_from_url(f"{self.URL_BASE}/campaigns/{campaign}/courses.csv")
        out['pages_edited'] = csv_from_url(f"{self.URL_BASE}/campaigns/{campaign}/articles_csv.csv")
        return out

- `users.json`
- `campaigns.json`
- `course.json`
- `tags.json`
- `timeline.json`

In [60]:
wikiedu = WikiEduDashboard()

In [61]:
campaigns = wikiedu.campaigns()

In [62]:
for campaign in campaigns:
    print(campaign)
    campaigns[campaign] = wikiedu.get_campaign(campaign)

summer_2015
Student_Groups_Pilot


KeyboardInterrupt: 

In [None]:
import json
with open('campaigns.json', 'w') as f:
    json.dump(campaigns, f)

In [76]:
import os
import os.path
import json

outdir = "courses"
os.makedirs(outdir, exist_ok=True)
courses = {}
for _, campaign in campaigns.items():
    for coursedata in campaign['course_data']:
        print(coursedata)
        coursedata['course_slug']
        slug = coursedata['course_slug']
        print(slug)
        school, course = slug.split('/')
        filename = os.path.join(outdir, school, course + '.json')
        if os.path.exists(filename):
            print(f"{filename} exists")
            continue
        data = wikiedu.get_course(slug)
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        with open(filename, 'w') as f:
            print(f"Writing {slug} to {filename}")
            json.dump(data, f)
    

{'course_slug': 'North_Dakota_State_University/English_325:_Writing_in_the_Health_Professions_(Summer_2015)', 'title': 'English 325: Writing in the Health Professions', 'institution': 'North Dakota State University', 'term': 'Summer 2015', 'home_wiki': 'en.wikipedia.org', 'created_at': '2015-05-15 15:15:14 UTC', 'start_date': '2015-07-14 00:00:00 UTC', 'end_date': '2015-08-14 23:59:59 UTC', 'new_or_returning': 'first_time_instructor', 'editors': '14', 'new_editors': '12', 'articles_edited': '21', 'articles_created': '0', 'bytes_added': '7509', 'total_edits': '158', 'mainspace_edits': '55', 'article_talk_edits': '21', 'userspace_edits': '49', 'article_views': '415726', 'upload_count': '0', 'uploads_used_in_articles': '0', 'upload_usage_count_across_all_wikis': '0', 'training_completion_rate': '0.9285714285714286'}
North_Dakota_State_University/English_325:_Writing_in_the_Health_Professions_(Summer_2015)
courses/North_Dakota_State_University/English_325:_Writing_in_the_Health_Professions

TypeError: string indices must be integers

In [64]:
slug

'Amherst_College/Rotherwas_Fellows_(Summer_2015)'