# Setup
Hello.  Connecting to Oracle in Python is a trip and a half.  Connecting to SaaS or to Bb Data is supposedly easier - and I'm working to ensure that importing things works easily for everyone.

In [15]:
from sqlalchemy import create_engine
from os import getenv as ge
from dotenv import load_dotenv
import pandas as pd

load_dotenv()
bbuser, bbpass, bbhost = ge('bbuser'), ge('bbpass'), ge('bbhost')
con_string = f'oracle+cx_oracle://{bbuser}:{bbpass}@{bbhost}'
engine = create_engine(con_string, max_identifier_length=128)

### Snowflake setup
Connecting to snowflake is much easier.  Everything in this presentation is focused on Oracle PL/SQL, but if you are on SaaS - many examples can be translated and run against snowflake or postgres.  Here's how to connect via Snowflake.

In [1]:
from snowflake.sqlalchemy import URL
snow_engine = create_engine(URL(
    account = ge('snow_account'),
    user = ge('snow_user'),
    password = ge('snow_pass'),
    database = ge('snow_db'),
    warehouse = ge('snow_wh')
))


query = """
select
    year(lt.start_date) as term_year,
    ifnull(lt.name, 'Unknown Term') as term,
    replace(ifnull(h2.name, 'No Parent'),'NoName', 'Institution') as hierarchy_parent_node,
    ifnull(h1.name, 'No Node') as hierarchy_node,
    count(distinct lc.id) as course_count
from cdm_lms.course lc
inner join cdm_lms.institution_hierarchy_course ihc
    on lc.id = ihc.course_id
    and ihc.primary_ind = 1
    and ihc.row_deleted_time is null
left join cdm_lms.institution_hierarchy h1
    on ihc.institution_hierarchy_id = h1.id
left join cdm_lms.institution_hierarchy h2
    on h1.institution_hierarchy_parent_id = h2.id 
left join cdm_lms.term lt
    on lt.id = lc.term_id
group by
    year(lt.start_date),
    h1.name,
    h2.name,
    lt.name
order by
    year(lt.start_date),
    lt.name,
    h1.name
"""
example = pd.read_sql(query, con=snow_engine)                    

NameError: name 'create_engine' is not defined

In [17]:
example.head()

Unnamed: 0,term_year,term,hierarchy_parent_node,hierarchy_node,course_count
0,2014.0,Fall 2014,Institution,Beta,10
1,2014.0,Fall 2014,Institution,Chemical Engineering,15
2,2014.0,Fall 2014,Institution,English,27
3,2014.0,Fall 2014,Institution,First Year,26
4,2014.0,Fall 2014,Institution,Honors,17


### Simple example
This selects 5 users and imports into a pandas dataframe

In [2]:
query = """
SELECT *
FROM BB_BB60.USERS
FETCH FIRST 5 ROWS ONLY
"""

users = pd.read_sql_query(query, con=engine)

In [3]:
users.head()

Unnamed: 0,pk1,city,data_src_pk1,system_role,sos_id_pk2,dtcreated,dtmodified,row_status,batch_uid,user_id,...,locale,card_number,settings,othername,suffix,cld_id,cld_avatar_url,uuid,calendar_type,week_first_day
0,624298,,14,N,1,2013-10-17 03:01:27,2015-08-09 02:00:07,0,2794944,d891p293,...,,,,,,,,68b5fcb8088c47f483591cacf9f7813e,,
1,624306,,14,N,1,2013-10-17 03:01:39,2015-08-09 02:00:07,0,2777941,m691m903,...,,,,,,,,f1d6bbc1dbb94cdc9aaf905007dc8e46,,
2,374441,Lawrence,14,N,1,2003-01-03 22:00:59,2020-07-10 01:08:16,0,1085372,stransue,...,en_US,,,,,,,ffb1757c925f494f96d1823302b689db,,
3,374443,Lawrence,14,N,1,2003-01-03 22:00:59,2020-07-10 01:05:24,0,1003185,jtraxler,...,,,,,,,,3d6eb939bc3349ec810ed4259ad4bb79,,
4,374444,Lawrence,14,N,1,2003-01-03 22:00:59,2019-08-12 17:33:03,0,1013679,pboyle,...,,,,,,,,a04fd36288194b5fb894e3dbbfb40179,,


# From files example

In [53]:
query = ''
with open('./queryfiles/no_access.sql','r') as f:
    query = f.read()

In [54]:
pprint(query)

("SELECT SUBSTR(REGEXP_SUBSTR(cm.COURSE_NAME, '-[A-Z&]+'),2) COURSE_UNIT,\n"
 "       SUBSTR(REGEXP_SUBSTR(cm.COURSE_NAME, ' [A-Z]{3}$'),2) COURSE_TYPE,\n"
 "       REPLACE(cm.COURSE_NAME,',',' ') COURSE_NAME,\n"
 '       cm.COURSE_ID,\n'
 '       cm.AVAILABLE_IND,\n'
 '       MAX(cu.LAST_ACCESS_DATE) last_access,\n'
 '       SUM(students.students)/COUNT(students.students) student_count,\n'
 "       listagg(u.email, ';') WITHIN GROUP (order by u.email) emails,\n"
 "       listagg(u.firstname || ' ' || u.LASTNAME, ';') WITHIN GROUP (order by "
 'u.email) names\n'
 '\n'
 'FROM BB_BB60.USERS u inner join BB_BB60.COURSE_USERS cu on u.pk1 = '
 'cu.USERS_PK1\n'
 'inner join BB_BB60.COURSE_MAIN cm on cu.CRSMAIN_PK1 = cm.pk1\n'
 'left join (select crsmain_pk1, count(pk1) students\n'
 '    from bb_bb60.COURSE_USERS\n'
 "    where role='S'\n"
 '    and row_status = 0\n'
 '    group by crsmain_pk1) students on students.crsmain_pk1 = cm.pk1\n'
 '\n'
 "WHERE cm.COURSE_NAME LIKE '2020' || :season ||

# Creating Pandas DataFrames from SQL
Pandas dataframes are the standard for working with data in python.  They can read in a variety of data files.  In this case, we are reading in SQL and querying the database.  Most of these queries seek to get a LOT of information, that can be paired down after the fact.  The same can be acheived directly by editing the query - but pandas helps us combine information from other sources, and write to files to a variety of other formats.

In [55]:
access_df = pd.read_sql(query, 
                        params={'season':'Summer'}, 
                        con=engine, 
                        parse_dates=['last_access'])

In [56]:
access_df.head()

Unnamed: 0,course_unit,course_type,course_name,course_id,available_ind,last_access,student_count,emails,names
0,PHSX,LBN,2020Summer-PHSX 114 College Physics I LBN,4206-71277,Y,2020-07-10 13:37:50,116,ColeLindsey@ku.edu;am.morgan@ku.edu;b047m507@k...,Cole Lindsey;Aaron Morgan;Brendon Madison;Carl...
1,PHSX,LEC,2020Summer-PHSX 114 College Physics I LEC,4206-71276,N,2020-07-09 20:33:36,107,a665l723@ku.edu;shark@ku.edu;slegres@ku.edu;su...,Ann Lindbloom;Christopher Fischer;Sarah LeGres...
2,PHAR,FLD,2020Summer-PHAR 550 Introductory Pharmacy Prac...,4206-72717,N,NaT,104,jheidric@ku.edu,Joseph Heidrick
3,ECON,LEC,2020Summer-ECON 142 Principles of Microeconomi...,4206-70484,Y,2020-07-07 18:58:27,103,j_lugovskyy@ku.edu;joseoyeon@ku.edu,Josephine Lugovskyy;SeoYeon Jo
4,PHSX,LEC,2020Summer-PHSX 212 General Physics II LEC,4206-74307,Y,2020-07-10 13:35:30,101,a665l723@ku.edu;j743d550@ku.edu;sandhyaravikum...,Ann Lindbloom;Jennifer Delgado;Sandhya Ravikum...


In [57]:
no_access_df = access_df[(access_df['course_type']=='LEC') & 
                         (access_df['available_ind']=='Y') & 
                         (access_df['last_access'] < pd.to_datetime("2020-07-06"))]

In [58]:
no_access_df.head()

Unnamed: 0,course_unit,course_type,course_name,course_id,available_ind,last_access,student_count,emails,names
25,BIOL,LEC,2020Summer-BIOL 100 Principles of Biology LEC,4206-70141,Y,2020-07-03 15:59:16,52,tmarria@ku.edu,Tara Marriage
63,ARCH,LEC,2020Summer-ARCH 531 Environmental Systems II LEC,4206-77354,Y,2020-06-01 10:40:49,38,jdchang@ku.edu,Jae Chang
64,SPED,LEC,2020Summer-SPED 501 American Sign Language I (...,4206-72962,Y,2020-06-26 15:02:01,37,rmalcolm@ku.edu,Ron Malcolm
65,ARCH,LEC,2020Summer-ARCH 530 Environmental Systems I LEC,4206-76825,Y,2020-06-12 17:06:43,37,jdchang@ku.edu;pittman.jason.k@ku.edu,Jae Chang;Jason Pittman
67,LAW,LEC,2020Summer-LAW 908 Evidence LEC,4206-70928,Y,2020-06-18 13:37:15,37,sleben@ku.edu,Steve Leben


# Creating Excel Files from Pandas

In [11]:
from datetime import datetime
date = datetime.today().strftime("%m-%d")
no_access_df.to_excel(f'{date}-NoAccess.xlsx')

# Putting those excel files into onedrive

### Setup MS Connection

In [12]:
client_id = ge('client_id')
authority_id = ge('authority_id')
client_credential = ge('client_credential')
drive_id = ge('drive_id')
item_id = ge('item_id')

In [14]:
from msal import ConfidentialClientApplication
import requests

app = ConfidentialClientApplication(client_id=client_id,
                                    authority=f"https://login.microsoftonline.com/{authority_id}",
                                    client_credential=client_credential)

result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
token = result['access_token']
s = requests.Session()
http_headers = {'Authorization': 'Bearer ' + result['access_token'],
                'Accept': 'application/json',
                'Content-Type': 'application/json'}
s.headers.update(http_headers)
base_url = 'https://graph.microsoft.com/beta'

In [15]:
r = s.put(f"{base_url}/drives/{drive_id}/items/{item_id}:/{date}-NoAccess.xlsx:/content", 
            data=open(f'{date}-NoAccess.xlsx','rb'))

In [16]:
r.json()

{'@odata.context': "https://graph.microsoft.com/beta/$metadata#drives('b%212gKIaGIsfU-pWCgyfCEltNbxnxaD5cVLmJ4HlMYYp737N6aHJRFRRoUKxHMP1jfc')/items/$entity", '@microsoft.graph.downloadUrl': 'https://kansas-my.sharepoint.com/personal/m500d520_home_ku_edu/_layouts/15/download.aspx?UniqueId=173d8f55-de24-4f40-8f7a-a19fed380811&Translate=false&tempauth=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTBmZjEtY2UwMC0wMDAwMDAwMDAwMDAva2Fuc2FzLW15LnNoYXJlcG9pbnQuY29tQDNjMTc2NTM2LWFmZTYtNDNmNS1iOTY2LTM2ZmVhYmJlM2MxYSIsImlzcyI6IjAwMDAwMDAzLTAwMDAtMGZmMS1jZTAwLTAwMDAwMDAwMDAwMCIsIm5iZiI6IjE1OTQ0MDYzNjkiLCJleHAiOiIxNTk0NDA5OTY5IiwiZW5kcG9pbnR1cmwiOiJ6L2dlZmw1cExqLzJPQ2I0RFh6Z21YR012cmN0WndZVGl5RHAvNUp0RVBzPSIsImVuZHBvaW50dXJsTGVuZ3RoIjoiMTUwIiwiaXNsb29wYmFjayI6IlRydWUiLCJjaWQiOiJZalZoWkRFMVpqUXROREV4T1MwMFkyUmxMVGhtWWpBdE1tSmhPVEZpTW1RMFlqQm0iLCJ2ZXIiOiJoYXNoZWRwcm9vZnRva2VuIiwic2l0ZWlkIjoiTmpnNE9EQXlaR0V0TW1NMk1pMDBaamRrTFdFNU5UZ3RNamd6TWpkak1qRXlOV0kwIiwiYXBwX2Rpc3BsYXluYW1lIjoiS