Skip to content

Commit

Permalink
upgrade Python to v3.10 (iss. #1406) (#1426)
Browse files Browse the repository at this point in the history
* #1406 - spelling correction & clean-up

Cleaned up trailing spaces on a few lines.

* #1406 - upgrade to Python 3.10

Resolves vulnerability CVE-2015-20107.

* #1406 - upgrade `pandas` for Python 3.10

Upgrade `pandas` and related modules to work with Python 3.10.

* #1406 - fix all warehouse connections

Derive PostgreSQL connect string for data warehouse DB from Django connections, then create a single engine for all queries that will use that DB.

* Create utility function for creating mysql and postgres engines; apply to views'

* Remove other database conn prep

* Reuse create_sqlalchemy_engine in data_validation

* Remove unused variable

* Make a couple minor modifications to db_util

* Make one read_sql call one line

* Remove unused import

* Update numpy, pangres; change mypy version

* Remove type parameter, use Django ENGINE

* Reverting change to validate_udw_vs_udp since it already created an
engine

Co-authored-by: Sam Sciolla <ssciolla@umich.edu>
Co-authored-by: Code Hugger (Matthew Jones) <jonespm@umich.edu>
  • Loading branch information
3 people committed Sep 14, 2022
1 parent 9fa492d commit aa091fc
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 44 deletions.
31 changes: 28 additions & 3 deletions dashboard/common/db_util.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,44 @@
# Some utility functions used by other classes in this project
import logging
from datetime import datetime
from typing import Dict, List, TypedDict, Union
from typing import Dict, List, Literal, TypedDict, Union
from urllib.parse import quote_plus

from dateutil.parser import parse
import django
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from dateutil.parser import parse
from django.conf import settings
from django.contrib.auth.models import User as DjangoUser
from django_cron.models import CronJobLog

from dashboard.models import Course, User

from django.contrib.auth.models import User as DjangoUser

logger = logging.getLogger(__name__)

BACKENDS_PATH = 'django.db.backends.'


class DjangoDBParams(TypedDict):
ENGINE: Literal['django.db.backends.mysql', 'django.db.backends.postgresql']
NAME: str
USER: str
PASSWORD: str
HOST: str
PORT: int


def create_sqlalchemy_engine(db_params: DjangoDBParams) -> Engine:
new_db_params: DjangoDBParams = db_params.copy()
new_db_params['PASSWORD'] = quote_plus(db_params['PASSWORD'])

core_string = '{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**new_db_params)
if new_db_params['ENGINE'] == (BACKENDS_PATH + 'mysql'):
return create_engine(f'mysql+mysqldb://{core_string}?charset=utf8mb4')
else:
return create_engine('postgresql://' + core_string)


def canvas_id_to_incremented_id(canvas_id):
try:
Expand Down
29 changes: 8 additions & 21 deletions dashboard/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
from collections import namedtuple
from typing import Any, Dict, List, Union
from urllib.parse import quote_plus

import hjson
import pandas as pd
Expand All @@ -14,7 +13,7 @@
from django.db.models import QuerySet
from django_cron import CronJobBase, Schedule
from google.cloud import bigquery
from sqlalchemy import create_engine, types
from sqlalchemy import types
from sqlalchemy.engine import ResultProxy

from dashboard.common import db_util, utils
Expand All @@ -23,20 +22,8 @@

logger = logging.getLogger(__name__)

db_name = settings.DATABASES['default']['NAME']
db_user = settings.DATABASES['default']['USER']
db_password = settings.DATABASES['default']['PASSWORD']
db_host = settings.DATABASES['default']['HOST']
db_port = settings.DATABASES['default']['PORT']
logger.debug("db-name:" + db_name)
logger.debug("db-user:" + db_user)

engine = create_engine("mysql+mysqldb://{user}:{password}@{host}:{port}/{db}?charset=utf8mb4"
.format(db=db_name, # your mysql database name
user=db_user, # your mysql user for the database
password=quote_plus(db_password), # password for user
host=db_host,
port=db_port))
engine = db_util.create_sqlalchemy_engine(settings.DATABASES['default'])
data_warehouse_engine = db_util.create_sqlalchemy_engine(settings.DATABASES['DATA_WAREHOUSE'])

# Set up queries array from configuration file
CRON_QUERY_FILE = settings.CRON_QUERY_FILE
Expand All @@ -61,7 +48,7 @@ def split_list(a_list: list, size: int = 20):
def util_function(sql_string, mysql_table, param_object=None, table_identifier=None):
logger.debug(f'sql={sql_string}')
logger.debug(f'table={mysql_table} param_object={param_object} table_identifier={table_identifier}')
df = pd.read_sql(sql_string, conns['DATA_WAREHOUSE'], params=param_object)
df = pd.read_sql(sql_string, data_warehouse_engine, params=param_object)

# drop duplicates
df = df.drop_duplicates(keep='first')
Expand Down Expand Up @@ -137,7 +124,7 @@ def verify_course_ids(self):
logger.debug("in checking course")
supported_courses = Course.objects.get_supported_courses()
course_ids = [str(x) for x in supported_courses.values_list('id', flat=True)]
courses_data = pd.read_sql(queries['course'], conns['DATA_WAREHOUSE'], params={'course_ids': tuple(course_ids)})
courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': tuple(course_ids)})
# error out when course id is invalid, otherwise add DataFrame to list
for course_id, data_last_updated in supported_courses:
if course_id not in list(courses_data['id']):
Expand Down Expand Up @@ -211,7 +198,7 @@ def update_canvas_resource(self):
# Select all the files for these courses
# convert int array to str array
df_attach = pd.read_sql(queries['resource'],
conns['DATA_WAREHOUSE'],
data_warehouse_engine,
params={'course_ids': tuple(self.valid_locked_course_ids)})
logger.debug(df_attach)
# Update these back again based on the dataframe
Expand Down Expand Up @@ -416,7 +403,7 @@ def update_resource_access(self):
# First, update resource table
try:
dtype = {'resource_id': types.VARCHAR(255)}
pangres.upsert(engine=engine, df=resource_df,
pangres.upsert(con=engine, df=resource_df,
table_name='resource', if_row_exists='update',
create_schema=False, add_new_columns=False,
dtype=dtype)
Expand Down Expand Up @@ -530,7 +517,7 @@ def update_term(self) -> str:

term_sql: str = queries['term']
logger.debug(term_sql)
warehouse_term_df: pd.DataFrame = pd.read_sql(term_sql, conns['DATA_WAREHOUSE'])
warehouse_term_df: pd.DataFrame = pd.read_sql(term_sql, data_warehouse_engine)

existing_terms_ids: List[int] = [term.id for term in list(AcademicTerms.objects.all())]
new_term_ids: List[int] = [int(id) for id in warehouse_term_df['id'].to_list() if id not in existing_terms_ids]
Expand Down
19 changes: 10 additions & 9 deletions dashboard/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from django.conf import settings
from django.contrib import auth
from django.core.exceptions import ObjectDoesNotExist
from django.db import connection as conn
from django.forms.models import model_to_dict
from django.http import HttpResponse, HttpResponseForbidden, JsonResponse
from django.shortcuts import redirect, render
Expand All @@ -20,7 +19,7 @@
from rules.contrib.views import permission_required, objectgetter

from dashboard.common import utils
from dashboard.common.db_util import canvas_id_to_incremented_id
from dashboard.common.db_util import canvas_id_to_incremented_id, create_sqlalchemy_engine
from dashboard.event_logs_types.event_logs_types import EventLogTypes
from dashboard.models import Course, CourseViewOption, Resource, UserDefaultSelection, User
from dashboard.settings import COURSES_ENABLED, RESOURCE_VALUES, RESOURCE_VALUES_MAP, \
Expand All @@ -44,6 +43,8 @@

BinningGrade = namedtuple('BinningGrade', ['value', 'index', 'binning_all'])

app_engine = create_sqlalchemy_engine(settings.DATABASES['default'])


def gpa_map(grade):
if grade is None:
Expand Down Expand Up @@ -285,7 +286,7 @@ def resource_access_within_week(request, course_id=0):
elif (grade == GRADE_C):
total_number_student_sql += " and current_grade >= 70 and current_grade < 80"

total_number_student_df = pd.read_sql(total_number_student_sql, conn, params={
total_number_student_df = pd.read_sql(total_number_student_sql, app_engine, params={
"course_id": course_id,
"enrollment_type": "StudentEnrollment"
})
Expand Down Expand Up @@ -323,7 +324,7 @@ def resource_access_within_week(request, course_id=0):
endTimeString = end.strftime('%Y%m%d') + "000000"
logger.debug(sqlString)
logger.debug("start time=" + startTimeString + " end_time=" + endTimeString)
df = pd.read_sql(sqlString, conn, params={
df = pd.read_sql(sqlString, app_engine, params={
"start_time": startTimeString,
"end_time": endTimeString,
"course_id": course_id,
Expand Down Expand Up @@ -387,7 +388,7 @@ def resource_access_within_week(request, course_id=0):
logger.debug(selfSqlString)
logger.debug("current_user=" + current_user)

selfDf= pd.read_sql(selfSqlString, conn, params={"current_user":current_user, "course_id": course_id})
selfDf= pd.read_sql(selfSqlString, app_engine, params={"current_user":current_user, "course_id": course_id})
output_df = output_df.join(selfDf.set_index('resource_id_type'), on=['resource_id_type'], how='left')
output_df["total_percent"] = output_df.apply(lambda row: row[GRADE_A] + row[GRADE_B] + row[GRADE_C] + row[GRADE_LOW] + row.NO_GRADE, axis=1)

Expand Down Expand Up @@ -448,7 +449,7 @@ def grade_distribution(request, course_id=0):
(select current_grade from user where sis_name=%(current_user)s and course_id=%(course_id)s) as current_user_grade
from user where course_id=%(course_id)s and enrollment_type=%(enrollment_type)s
"""
df = pd.read_sql(grade_score_sql, conn, params={
df = pd.read_sql(grade_score_sql, app_engine, params={
'current_user': current_user,
'course_id': course_id,
'enrollment_type': 'StudentEnrollment'
Expand Down Expand Up @@ -663,7 +664,7 @@ def get_course_assignments(course_id):
(select distinct assignment_id,avg_score from submission where course_id=%(course_id)s) as sub on sub.assignment_id = assign.assignment_id
"""

assignments_in_course = pd.read_sql(sql,conn,params={'course_id': course_id}, parse_dates={'due_date': '%Y-%m-%d'})
assignments_in_course = pd.read_sql(sql, app_engine, params={'course_id': course_id}, parse_dates={'due_date': '%Y-%m-%d'})
# No assignments found in the course
if assignments_in_course.empty or (assignments_in_course['assignment_id'] == 0).all():
logger.info('The course %s don\'t seems to have assignment data' % course_id)
Expand Down Expand Up @@ -697,7 +698,7 @@ def get_course_assignments(course_id):
def get_user_assignment_submission(current_user,assignments_in_course_df, course_id):
sql = "select assignment_id, submitted_at, score, graded_date from submission where " \
"user_id=(select user_id from user where sis_name = %(current_user)s and course_id = %(course_id)s ) and course_id = %(course_id)s"
assignment_submissions = pd.read_sql(sql, conn, params={'course_id': course_id, "current_user": current_user})
assignment_submissions = pd.read_sql(sql, app_engine, params={'course_id': course_id, "current_user": current_user})
if assignment_submissions.empty:
logger.info('The user %s seems to be a not student in the course.' % current_user)
# manually adding the columns for display in UI
Expand Down Expand Up @@ -772,7 +773,7 @@ def find_current_week(row):

def is_weight_considered(course_id):
url = "select consider_weight from assignment_weight_consideration where course_id=%(course_id)s"
df = pd.read_sql(url, conn, params={"course_id": course_id})
df = pd.read_sql(url, app_engine, params={"course_id": course_id})
value = df['consider_weight'].iloc[0]
return value

Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RUN npm prune --production && \
find node_modules -type d -name "examples" -print0 | xargs -0 rm -rf

# FROM directive instructing base image to build upon
FROM python:3.8-slim AS app
FROM python:3.10-slim AS app

# EXPOSE port 5000 to allow communication to/from server
EXPOSE 5000
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile.openshift
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ RUN npm prune --production && \
find node_modules -type d -name "examples" -print0 | xargs -0 rm -rf

# FROM directive instructing base image to build upon
FROM docker-registry.default.svc:5000/openshift/python:3.8-slim AS app
FROM docker-registry.default.svc:5000/openshift/python:3.10-slim AS app

# EXPOSE port 5000 to allow communication to/from server
EXPOSE 5000
Expand Down
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[mypy]
python_version = 3.8
python_version = 3.10
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ django-filter==2.4.0
rules==3.0

# These should be okay to update minors
numpy==1.22.0
pandas==1.3.1
pangres==2.3.1
numpy==1.23.3
pandas==1.4.4
pangres==4.1.2

SQLAlchemy==1.4.22
psycopg2==2.9.1
mysqlclient==2.0.3
google-cloud-bigquery[pandas]==2.24.0
google-cloud-bigquery[pandas]==3.3.2

debugpy==1.4.1
jsonschema==3.2.0
Expand Down
8 changes: 4 additions & 4 deletions start.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#!/bin/bash

# Case insenstive match
# Case insensitive match
shopt -s nocaseglob

if [ -z "${ENV_FILE}" ]; then
Expand Down Expand Up @@ -48,7 +48,7 @@ else
fi

echo "Waiting for DB"
while ! nc -z "${MYSQL_HOST}" "${MYSQL_PORT}"; do
while ! nc -z "${MYSQL_HOST}" "${MYSQL_PORT}"; do
sleep 1 # wait 1 second before check again
done

Expand Down Expand Up @@ -89,7 +89,7 @@ if [ "${IS_CRON_POD:-"false"}" == "false" ]; then
--workers="${GUNICORN_WORKERS}" \
--timeout="${GUNICORN_TIMEOUT}" \
${GUNICORN_RELOAD}

else
if [ -z "${CRONTAB_SCHEDULE}" ]; then
echo "CRONTAB_SCHEDULE environment variable not set, crontab cannot be started. Please set this to a crontab acceptable format."
Expand Down

0 comments on commit aa091fc

Please sign in to comment.