In [1]:
import pandas as pd
import sqlalchemy
import csv
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
import sqlite3 as sql
from sqlalchemy import create_engine, MetaData, inspect
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey

In [2]:
# Create Engine
### BEGIN SOLUTION
engine = create_engine("sqlite:///survivalprediction.sqlite")
### END SOLUTION

In [6]:
# Use `declarative_base` from SQLAlchemy to model table as an ORM class
# Make sure to specify types for each column

# Declare a Base object here
### BEGIN SOLUTION
Base = declarative_base()

### END SOLUTION

In [7]:
conn = engine.connect()
conn.text_factory = str

In [8]:
# Define the ORM class
### BEGIN SOLUTION
class Surv(Base):
    
    __tablename__ = 'prediction'
    
    Primary_Key = Column(Integer, primary_key=True, unique=True)
    Age = Column(String(20))
    Cancer_Stage = Column(String(10))
    Gender = Column(String(10))
    Cancer_Site = Column(String(25))
    Race = Column(String(50))
    Median_Household_Income = Column(String(25))
    Cancer_Type = Column(String(50))

                      
### END SOLUTION

In [9]:
# Use `create_all` to create the tables
### BEGIN SOLUTION
Base.metadata.create_all(engine)
### END SOLUTION

In [10]:
inspector = inspect(engine)

In [11]:
table_names = inspector.get_table_names()
print(table_names)

['prediction']


In [12]:
columns = inspector.get_columns('prediction')
for column in columns:
    print(column["name"], column["type"])

Primary_Key INTEGER
Age VARCHAR(20)
Cancer_Stage VARCHAR(10)
Gender VARCHAR(10)
Cancer_Site VARCHAR(25)
Race VARCHAR(50)
Median_Household_Income VARCHAR(25)
Cancer_Type VARCHAR(50)


In [13]:
file = "C://Users/shawn/CancerPrediction.txt"

In [14]:
#Read csv
Survival = pd.read_csv(file, encoding = 'utf8')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [15]:
#Create dataframe with newly appended data
Survival_DF = pd.DataFrame(Survival)

In [16]:
Survival_DF.head(5)

Unnamed: 0,Sex,Age recode with single ages and 85+,Median household income inflation adj to 2018,Histology recode - broad groupings,"Derived AJCC Stage Group, 7th ed (2010-2015)",Survival months,"Race and origin recode (NHW, NHB, NHAIAN, NHAPI, Hispanic)",CS Schema - AJCC 6th Edition
0,Male,73 years,"$75,000+",8140-8389: adenomas and adenocarcinomas,I,82,Non-Hispanic White,Prostate
1,Male,66 years,"$75,000+",8140-8389: adenomas and adenocarcinomas,IIB,61,Non-Hispanic White,Prostate
2,Female,54 years,"$75,000+",8500-8549: ductal and lobular neoplasms,IIA,24,Non-Hispanic White,Breast
3,Female,82 years,"$75,000+",8500-8549: ductal and lobular neoplasms,IA,63,Non-Hispanic White,Breast
4,Female,56 years,"$75,000+",8500-8549: ductal and lobular neoplasms,IA,45,Non-Hispanic White,Breast


In [17]:
Survival_DF2 = Survival_DF.rename(columns={'Sex': 'Gender', 'Age recode with single ages and 85+':'Age',
       'Median household income inflation adj to 2018':'Median_Household_Income','Histology recode - broad groupings':'Cancer_Type',
       'Derived AJCC Stage Group, 7th ed (2010-2015)':'Cancer_Stage', 'Survival months':'Survival_Months',
       'Race and origin recode (NHW, NHB, NHAIAN, NHAPI, Hispanic)':'Race','CS Schema - AJCC 6th Edition': 'Cancer_Site'})

In [18]:
Survival_DF2['Age'] = Survival_DF2['Age'].str.replace(' years','')
Survival_DF2['Age'] = Survival_DF2['Age'].str.replace('+','')


  Survival_DF2['Age'] = Survival_DF2['Age'].str.replace('+','')


In [19]:
Survival_DF2['Cancer_Type'] = Survival_DF2['Cancer_Type'].str.split(':').str[1]

In [20]:
#Capitalize first letter of each word in 'Cancer Type column'
Survival_DF2['Cancer_Type'] = Survival_DF2['Cancer_Type'].str.title()

In [21]:
Survival_DF3 = Survival_DF2.drop(['Survival_Months'], axis=1)

In [26]:
Survival_DF3.head()

Unnamed: 0,Gender,Age,Median_Household_Income,Cancer_Type,Cancer_Stage,Race,Cancer_Site
0,Male,73,"$75,000+",Adenomas And Adenocarcinomas,I,Non-Hispanic White,Prostate
1,Male,66,"$75,000+",Adenomas And Adenocarcinomas,IIB,Non-Hispanic White,Prostate
2,Female,54,"$75,000+",Ductal And Lobular Neoplasms,IIA,Non-Hispanic White,Breast
3,Female,82,"$75,000+",Ductal And Lobular Neoplasms,IA,Non-Hispanic White,Breast
4,Female,56,"$75,000+",Ductal And Lobular Neoplasms,IA,Non-Hispanic White,Breast


In [27]:
Base.metadata.create_all(engine)

In [28]:
#Convert csv data to dictionary
CancerSurvival = Final_Survival.to_dict(orient='records')
#View first row of dictionary
CancerSurvival[0]

{'Gender': 'Male',
 'Age': '73',
 'Median_Household_Income': '$75,000+',
 'Cancer_Type': ' Adenomas And Adenocarcinomas',
 'Cancer_Stage': 'I',
 'Race': 'Non-Hispanic White',
 'Cancer_Site': 'Prostate'}

In [29]:
metadata = MetaData(bind=engine)
metadata.reflect()

In [30]:
#Create sqlalchemy table
SurvivalTable = sqlalchemy.Table('prediction', metadata, autoload=True)

In [31]:
#Insert dictionary data into sqlalchemy table
conn.execute(SurvivalTable.insert(), CancerSurvival)

<sqlalchemy.engine.result.ResultProxy at 0x235af716040>

In [32]:
#View sqlalchemy table data
conn.execute("select * from prediction").fetchall()

[(1, '73', 'I', 'Male', 'Prostate', 'Non-Hispanic White', '$75,000+', ' Adenomas And Adenocarcinomas'),
 (2, '66', 'IIB', 'Male', 'Prostate', 'Non-Hispanic White', '$75,000+', ' Adenomas And Adenocarcinomas'),
 (3, '54', 'IIA', 'Female', 'Breast', 'Non-Hispanic White', '$75,000+', ' Ductal And Lobular Neoplasms'),
 (4, '82', 'IA', 'Female', 'Breast', 'Non-Hispanic White', '$75,000+', ' Ductal And Lobular Neoplasms'),
 (5, '56', 'IA', 'Female', 'Breast', 'Non-Hispanic White', '$75,000+', ' Ductal And Lobular Neoplasms'),
 (6, '85', 'I', 'Male', 'Bladder', 'Non-Hispanic White', '$75,000+', ' Transitional Cell Papillomas And Carcinomas'),
 (7, '67', 'IIIB', 'Female', 'Rectum', 'Non-Hispanic Asian or Pacific Islander', '$75,000+', ' Adenomas And Adenocarcinomas'),
 (8, '85', 'IIIB', 'Female', 'Breast', 'Non-Hispanic Black', '$75,000+', ' Epithelial Neoplasms, Nos'),
 (9, '71', 'IA', 'Female', 'Breast', 'Hispanic (All Races)', '$75,000+', ' Ductal And Lobular Neoplasms'),
 (10, '76', 'IV', 

# Flask Test

In [33]:
engine = create_engine("sqlite:///survivalprediction.sqlite")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save reference to the table
CS = Base.classes.prediction

In [34]:
session = Session(engine)

Cancer Site

In [35]:
# Query for unique cancer site names
cancer_site_results = session.query(CS.Cancer_Site).distinct()

In [36]:
    session.close()

    cancer_site_list = []
    for Cancer_Site in cancer_site_results:
        cancer_site_dict = {}
        cancer_site_dict["Cancer_Site"] = Cancer_Site
        cancer_site_list.append(cancer_site_dict)
        


In [37]:
print(cancer_site_list)

[{'Cancer_Site': ('Prostate',)}, {'Cancer_Site': ('Breast',)}, {'Cancer_Site': ('Bladder',)}, {'Cancer_Site': ('Rectum',)}, {'Cancer_Site': ('Lung',)}, {'Cancer_Site': ('Colon',)}, {'Cancer_Site': ('Melanoma',)}, {'Cancer_Site': ('SmallIntestine',)}, {'Cancer_Site': ('Corpus',)}, {'Cancer_Site': ('Stomach',)}, {'Cancer_Site': ('Kidney',)}, {'Cancer_Site': ('Thyroid',)}, {'Cancer_Site': ('Ovary',)}, {'Cancer_Site': ('Lymphoma',)}, {'Cancer_Site': ('SkinEyelid',)}, {'Cancer_Site': ('MF',)}, {'Cancer_Site': ('SoftTissue',)}, {'Cancer_Site': ('Vulva',)}, {'Cancer_Site': ('Urethra',)}, {'Cancer_Site': ('PancreasHead',)}, {'Cancer_Site': ('PancreasBodyTail',)}, {'Cancer_Site': ('Pleura',)}, {'Cancer_Site': ('BaseTongue',)}, {'Cancer_Site': ('Skin',)}, {'Cancer_Site': ('SupraLarynx',)}, {'Cancer_Site': ('OthPancreas',)}, {'Cancer_Site': ('Liver',)}, {'Cancer_Site': ('Anus',)}, {'Cancer_Site': ('GumUpper',)}, {'Cancer_Site': ('GumLower',)}, {'Cancer_Site': ('Oropharynx',)}, {'Cancer_Site': ('E

Median Household Income

In [38]:
Median_Household_Income_Results = session.query(CS.Median_Household_Income).distinct()

In [39]:
    session.close()

    median_household_income_list = []
    for Median_Household_Income in Median_Household_Income_Results:
        median_household_income_dict = {}
        median_household_income_dict["Median_Household_Income"] = Median_Household_Income
        median_household_income_list.append(median_household_income_dict)
        
print(median_household_income_list)

[{'Median_Household_Income': ('$75,000+',)}, {'Median_Household_Income': ('$65,000 - $69,999',)}, {'Median_Household_Income': ('$70,000 - $74,999',)}, {'Median_Household_Income': ('$60,000 - $64,999',)}, {'Median_Household_Income': ('$55,000 - $59,999',)}, {'Median_Household_Income': ('$40,000 - $44,999',)}, {'Median_Household_Income': ('$45,000 - $49,999',)}, {'Median_Household_Income': ('$50,000 - $54,999',)}, {'Median_Household_Income': ('$35,000 - $39,999',)}, {'Median_Household_Income': ('< $35,000',)}, {'Median_Household_Income': ('Unknown/missing/no match/Not 1990-2017',)}]


Race

In [40]:
Race_Results = session.query(CS.Race).distinct()

In [41]:
    session.close()

    race_list = []
    for Race in Race_Results:
        race_dict = {}
        race_dict["Race"] = Race
        race_list.append(race_dict)
        
print(race_list)

[{'Race': ('Non-Hispanic White',)}, {'Race': ('Non-Hispanic Asian or Pacific Islander',)}, {'Race': ('Non-Hispanic Black',)}, {'Race': ('Hispanic (All Races)',)}, {'Race': ('Non-Hispanic American Indian/Alaska Native',)}, {'Race': ('Non-Hispanic Unknown Race',)}]


Cancer Stage

In [42]:
Cancer_Stage_Results = session.query(CS.Cancer_Stage).distinct()

In [43]:
    session.close()

    cancer_stage_list = []
    for Cancer_Stage in Cancer_Stage_Results:
        cancer_stage_dict = {}
        cancer_stage_dict["Cancer_Stage"] = Cancer_Stage
        cancer_stage_list.append(cancer_stage_dict)
        
print(cancer_stage_list)

[{'Cancer_Stage': ('I',)}, {'Cancer_Stage': ('IIB',)}, {'Cancer_Stage': ('IIA',)}, {'Cancer_Stage': ('IA',)}, {'Cancer_Stage': ('IIIB',)}, {'Cancer_Stage': ('IV',)}, {'Cancer_Stage': ('IIIC',)}, {'Cancer_Stage': ('IIIA',)}, {'Cancer_Stage': ('IIIC1',)}, {'Cancer_Stage': ('III',)}, {'Cancer_Stage': ('IB',)}, {'Cancer_Stage': ('IEA',)}, {'Cancer_Stage': ('OCCULT',)}, {'Cancer_Stage': ('0a',)}, {'Cancer_Stage': ('IVB',)}, {'Cancer_Stage': ('IVA',)}, {'Cancer_Stage': ('II',)}, {'Cancer_Stage': ('INOS',)}, {'Cancer_Stage': ('IIINOS',)}, {'Cancer_Stage': ('IIIEA',)}, {'Cancer_Stage': ('IIEA',)}, {'Cancer_Stage': ('IVC',)}, {'Cancer_Stage': ('IIC',)}, {'Cancer_Stage': ('IE',)}, {'Cancer_Stage': ('IIEB',)}, {'Cancer_Stage': ('0is',)}, {'Cancer_Stage': ('IINOS',)}, {'Cancer_Stage': ('IB2',)}, {'Cancer_Stage': ('IIIESB',)}, {'Cancer_Stage': ('0',)}, {'Cancer_Stage': ('IVNOS',)}, {'Cancer_Stage': ('IIIC2',)}, {'Cancer_Stage': ('ISA',)}, {'Cancer_Stage': ('IIE',)}, {'Cancer_Stage': ('IBNOS',)}, {'

Cancer Type

In [44]:
Cancer_Type_Results = session.query(CS.Cancer_Type).distinct()

In [45]:
    session.close()

    cancer_type_list = []
    for Cancer_Type in Cancer_Type_Results:
        cancer_type_dict = {}
        cancer_type_dict["Cancer_Type"] = Cancer_Type
        cancer_type_list.append(cancer_type_dict)
        
print(cancer_type_list)

[{'Cancer_Type': (' Adenomas And Adenocarcinomas',)}, {'Cancer_Type': (' Ductal And Lobular Neoplasms',)}, {'Cancer_Type': (' Transitional Cell Papillomas And Carcinomas',)}, {'Cancer_Type': (' Epithelial Neoplasms, Nos',)}, {'Cancer_Type': (' Nevi And Melanomas',)}, {'Cancer_Type': (' Acinar Cell Neoplasms',)}, {'Cancer_Type': (' Complex Mixed And Stromal Neoplasms',)}, {'Cancer_Type': (' Squamous Cell Neoplasms',)}, {'Cancer_Type': (' Cystic, Mucinous And Serous Neoplasms',)}, {'Cancer_Type': (' Nhl - Mature B-Cell Lymphomas',)}, {'Cancer_Type': (' Adnexal And Skin Appendage Neoplasms',)}, {'Cancer_Type': (' Nhl - Mature T And Nk-Cell Lymphomas',)}, {'Cancer_Type': (' Blood Vessel Tumors',)}, {'Cancer_Type': (' Soft Tissue Tumors And Sarcomas, Nos',)}, {'Cancer_Type': (' Complex Epithelial Neoplasms',)}, {'Cancer_Type': (' Mesothelial Neoplasms',)}, {'Cancer_Type': (' Unspecified Neoplasms',)}, {'Cancer_Type': (' Hodgkin Lymphomas',)}, {'Cancer_Type': (' Mucoepidermoid Neoplasms',)},

Age

In [47]:
Age_Results = session.query(CS.Age).distinct()

In [49]:
    session.close()

    age_list = []
    for Age in Age_Results:
        age_dict = {}
        age_dict["Age"] = Age
        age_list.append(age_dict)
        
print(age_list)

[{'Age': ('73',)}, {'Age': ('66',)}, {'Age': ('54',)}, {'Age': ('82',)}, {'Age': ('56',)}, {'Age': ('85',)}, {'Age': ('67',)}, {'Age': ('71',)}, {'Age': ('76',)}, {'Age': ('60',)}, {'Age': ('75',)}, {'Age': ('69',)}, {'Age': ('80',)}, {'Age': ('70',)}, {'Age': ('59',)}, {'Age': ('84',)}, {'Age': ('61',)}, {'Age': ('52',)}, {'Age': ('64',)}, {'Age': ('68',)}, {'Age': ('57',)}, {'Age': ('53',)}, {'Age': ('49',)}, {'Age': ('45',)}, {'Age': ('46',)}, {'Age': ('72',)}, {'Age': ('83',)}, {'Age': ('74',)}, {'Age': ('58',)}, {'Age': ('62',)}, {'Age': ('81',)}, {'Age': ('79',)}, {'Age': ('78',)}, {'Age': ('55',)}, {'Age': ('77',)}, {'Age': ('63',)}, {'Age': ('44',)}, {'Age': ('35',)}, {'Age': ('23',)}, {'Age': ('34',)}, {'Age': ('65',)}, {'Age': ('41',)}, {'Age': ('50',)}, {'Age': ('51',)}, {'Age': ('42',)}, {'Age': ('40',)}, {'Age': ('48',)}, {'Age': ('38',)}, {'Age': ('47',)}, {'Age': ('39',)}, {'Age': ('43',)}, {'Age': ('25',)}, {'Age': ('37',)}, {'Age': ('31',)}, {'Age': ('20',)}, {'Age': (

Gender

In [50]:
Gender_Results = session.query(CS.Gender).distinct()

In [51]:
    session.close()

    gender_list = []
    for Gender in Gender_Results:
        gender_dict = {}
        gender_dict["Gender"] = Gender
        gender_list.append(gender_dict)
        
print(gender_list)

[{'Gender': ('Male',)}, {'Gender': ('Female',)}]
