In [None]:
# Main python notebook to run the automate survey reports project.
# Part1: Data Collection and Cleaning + Filtering + Pre-Processing.
# Part2: Report generation for all categories of processed data.

## Part-1
### Data Extraction and Pre-processing


In [None]:
# Importing necessary libraries

from scripts.data_extraction import download_survey
from scripts.data_extraction import get_data_after_recorded_date
from scripts.data_extraction import merge_datasets
from scripts.data_extraction import process_data

import os

In [None]:
# Load environment variables from .env file for Qualtrics API.

api_token = os.getenv("API_TOKEN")
intern_survey_id = os.getenv("INTERN_SURVEY_ID")
supervisor_survey_id = os.getenv("SUPERVISOR_SURVEY_ID")
datacenter_id = os.getenv("DATACENTER_ID")

In [None]:
# Creating directory for processed data to be used in following functions.
# Get the directory where the script is located
# current_dir = os.path.dirname(os.path.abspath("main.ipynb"))

# Get the parent directory of the current directory
# parent_dir = os.path.dirname(current_dir)

# Define the output directory relative to the parent directory
output_dir = os.path.join("MySurveys", "intermediate_results")

# Create the directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# Define the output directory relative to the parent directory
p_output_dir = os.path.join( "MySurveys", "processed_data")

# Create the directory if it does not exist
os.makedirs(p_output_dir, exist_ok=True)

# Define the output directory relative to the parent directory
r_output_dir = os.path.join("MySurveys", "raw_data")

# Create the directory if it does not exist
os.makedirs(r_output_dir, exist_ok=True)



In [None]:
# Extracting raw data from Qualtrics API

# Downloading Intern Survey Data
download_survey(intern_survey_id, datacenter_id, api_token)

# Downloading Supervisor Survey Data
download_survey(supervisor_survey_id, datacenter_id, api_token)

In [None]:
# Filtering datasets based on the user input date onwards (YYYY-MM-DD) for both students and supervisors
intern_data, supervisor_data = get_data_after_recorded_date()

In [None]:
# Merging Intern and Supervisor datasets based on key columns
merged_data = merge_datasets(intern_data, supervisor_data)

In [None]:
# Final step for data pre-processing:
# 1. Complete data records for both students and supervisors
# 2. Only_students_records
# 3. Only_supervisors_records
# 4. Concatenating joined records with mismatch supervisor names for same students.

process_data(merged_data)

## Part 2
### Report generation

In [None]:
# importing necessary libraries

from scripts.reports_generation import generate_reports_from_complete_csv
from scripts.reports_generation import generate_reports_from_only_student_csv
from scripts.reports_generation import generate_reports_from_only_supervisor_csv

import os

In [None]:
# Creating directory for processed data to be used in following functions.

# Define the output directory relative to the parent directory for complete reports storage
cr_output_dir = os.path.join( "Reports", "complete_reports")

# Create the directory if it does not exist
os.makedirs(cr_output_dir, exist_ok=True)

# Define the output directory relative to the parent directory for only students reports storage
osr_output_dir = os.path.join( "Reports", "only_students_reports")

# Create the directory if it does not exist
os.makedirs(osr_output_dir, exist_ok=True)

# Define the output directory relative to the parent directory for only supervisor reports storage
osur_output_dir = os.path.join( "Reports", "only_supervisor_reports")

# Create the directory if it does not exist
os.makedirs(osur_output_dir, exist_ok=True)

In [None]:
# Report generation for complete records where we have data for both supervisors and students.
complete_csv_path = "MySurveys/processed_data/processed_completed_data.csv"
generate_reports_from_complete_csv(complete_csv_path, cr_output_dir)

In [None]:
# Report generation for only students records where we have only students survey data and not for supervisors.
only_student_csv_path = "MySurveys/processed_data/processed_only_student_data.csv"
generate_reports_from_only_student_csv(only_student_csv_path, osr_output_dir)

In [None]:
# Report generation for only supervisor records where we have only supervisor survey data and not for students.
only_supervisor_csv_path = "MySurveys/processed_data/processed_only_supervisor_data.csv"
generate_reports_from_only_supervisor_csv(only_supervisor_csv_path, osur_output_dir)