<a href="https://colab.research.google.com/github/maya-papaya/ads1-cervical-cancer-analysis/blob/main/Creating%20Relational%20Database%20(3).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creating Relational Database

In [1]:
# SETTING UP COLAB AND MODULES
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import os
os.chdir("/content/drive/My Drive/ADS_Maya_Reddy/projects/disease_project_1/prepped_datasets/")

Mounted at /content/drive


In [9]:
# LOADING DATASETS
data19 = pd.read_csv('data19.csv')
data16 = pd.read_csv('data16.csv')
data10 = pd.read_csv('data10.csv')
pap = pd.read_csv('pap.csv')
hpv = pd.read_csv('hpv.csv')
screening_program = pd.read_csv('screening_program.csv')
medicaid_chip = pd.read_csv('medicaid_chip.csv')
adolescent = pd.read_csv('adolescent.csv')

In [15]:
# CREATING DATABASE TABLES
import sqlite3

connection = sqlite3.connect('cervical_cancer.db')

# Create a cursor object to execute SQL queries
cursor = connection.cursor()

# Create the 'data19' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS data19 (
    country PRIMARY KEY,
    year INTEGER NOT NULL,
    age_group_id INTEGER NOT NULL,
    age_group TEXT NOT NULL,
    haq_index_age_type TEXT NOT NULL,
    measure TEXT NOT NULL,
    val FLOAT NOT NULL,
    upper FLOAT NOT NULL,
    lower FLOAT NOT NULL
)''')
# Sourced from https://theleftjoin.com/how-to-write-a-pandas-dataframe-to-an-sqlite-table/
data19.to_sql('data19', connection, if_exists='replace', index=False)

# Create the 'data16' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS data16 (
    location PRIMARY KEY,
    year INTEGER NOT NULL,
    sex TEXT NOT NULL,
    age_group TEXT NOT NULL,
    metric TEXT NOT NULL,
    measure TEXT NOT NULL,
    val FLOAT NOT NULL,
    upper FLOAT NOT NULL,
    lower FLOAT NOT NULL
)''')
data16.to_sql('data16', connection, if_exists='replace', index=False)

# Create the 'data10' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS data10 (
    country PRIMARY KEY,
    year INTEGER NOT NULL,
    region TEXT NOT NULL,
    population FLOAT NOT NULL,
    cancer_death_count_to_49 INTEGER NOT NULL,
    cancer_case_count_to_49 INTEGER NOT NULL,
    percent_deaths_to_49 FLOAT NOT NULL,
    cancer_death_count_to_79 INTEGER NOT NULL,
    cancer_case_count_to_79 INTEGER NOT NULL,
    mortality_risk FLOAT NOT NULL,
    incidence_risk FLOAT NOT NULL
)
''')
data10.to_sql('data10', connection, if_exists='replace', index=False)

# Create the 'pap' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS pap (
    state PRIMARY KEY,
    city TEXT NOT NULL,
    region TEXT NOT NULL,
    pop_2010, INTEGER NOT NULL,
    val_2019 FLOAT NOT NULL,
    lower_2019 FLOAT NOT NULL,
    upper_2019 FLOAT NOT NULL,
    val_2016 FLOAT NOT NULL,
    lower_2016 FLOAT NOT NULL,
    upper_2016 FLOAT NOT NULL,
    val_2017 FLOAT NOT NULL,
    lower_2017 FLOAT NOT NULL,
    upper_2017 FLOAT NOT NULL,
    val_2018 FLOAT NOT NULL,
    lower_2018 FLOAT NOT NULL,
    upper_2018 FLOAT NOT NULL
)
''')
pap.to_sql('pap', connection, if_exists='replace', index=False)

# Create the 'hpv' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS hpv (
    country PRIMARY KEY,
    country_name TEXT NOT NULL,
    year INTEGER NOT NULL,
    region TEXT NOT NULL,
    income_group TEXT NOT NULL,
    cohort_size FLOAT NOT NULL,
    current_cov FLOAT NOT NULL,
    curr_vacc_cohort_size FLOAT NOT NULL,
    future_cov FLOAT NOT NULL,
    future_vacc_cohort_size FLOAT NOT NULL,
    curr_cc_prev FLOAT NOT NULL,
    curr_mort_prev FLOAT NOT NULL,
    curr_cost FLOAT NOT NULL,
    curr_cost_prev FLOAT NOT NULL,
    proj_cc_prev FLOAT NOT NULL,
    proj_mort_prev FLOAT NOT NULL,
    proj_cost FLOAT NOT NULL,
    current_net_cost FLOAT NOT NULL
)
''')
hpv.to_sql('hpv', connection, if_exists='replace', index=False)

# Create the 'screening_programs' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS screening_programs (
    code INTEGER NOT NULL,
    year PRIMARY KEY,
    screening_program TEXT NOT NULL,
    FOREIGN KEY (country) REFERENCES data10 (country)
)
''')
screening_program.to_sql('screening_programs', connection, if_exists='replace', index=False)

# Create the 'medicaid_chip' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS medicaid_chip (
    state TEXT NOT NULL,
    month TEXT NOT NULL,
    year PRIMARY KEY,
    service_count FLOAT NOT NULL,
    rate_per_1000 FLOAT NOT NULL
)
''')
medicaid_chip.to_sql('medicaid_chip', connection, if_exists='replace', index=False)

# Create the 'adolescent' table
cursor.execute('''
CREATE TABLE IF NOT EXISTS adolescent (
    location TEXT NOT NULL,
    location_type TEXT NOT NULL,
    year PRIMARY KEY,
    dose TEXT NOT NULL,
    dimension_type TEXT NOT NULL,
    dimension_val TEXT NOT NULL,
    val FLOAT NOT NULL,
    sample_size FLOAT NOT NULL,
    lower FLOAT NOT NULL,
    upper FLOAT NOT NULL
)
''')
adolescent.to_sql('adolescent', connection, if_exists='replace', index=False)

# Commit the changes to the database
connection.commit()

In [12]:
# CHECKING DATABASE TABLES
pd.read_sql('SELECT * FROM screening_programs', connection)

Unnamed: 0,country,code,year,screening_program
0,Afghanistan,AFG,2015,No
1,Afghanistan,AFG,2017,No
2,Afghanistan,AFG,2019,No
3,Afghanistan,AFG,2021,No
4,Albania,ALB,2015,No
...,...,...,...,...
771,Zambia,ZMB,2021,Yes
772,Zimbabwe,ZWE,2015,Yes
773,Zimbabwe,ZWE,2017,Yes
774,Zimbabwe,ZWE,2019,No
