# Fortune 500 Companies Database

### This notebook Transforms the following data into fortune500_db.sqlite
- Fortune 500 companies
- Fortune 500 stock values

Future additions include: 
- Fortune 500 job postings
- Global Exchange Rates

## Import necessary libraries:

In [1]:
import pandas as pd

In [2]:
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

## Create engine and connection to sqlite:

In [5]:
engine = create_engine("sqlite:///fortune500_db.sqlite")
conn = engine.connect()

## Create declarative base and create session to add to database:

In [6]:
Base = declarative_base()

Base.metadata.create_all(engine)

from sqlalchemy.orm import Session
session = Session(bind=engine)

## Import all csv files into a dataframe:

#### Start with fortune 500 comapnies and proceed with others.

In [7]:
fortune500_path = "./Data/Fortune_500_list_databahn_2018.csv"
company2tick_path = "./Data/company_to_tick.csv"
stock_values_path = "stock_values.csv"

In [33]:
fortune500_df = pd.read_csv(fortune500_path, encoding = "ISO-8859-1")
fortune500_df.columns

Index(['Fortune  Rank (2018)', 'Company Name', 'Ticker Symbol',
       'Fortune Rank (2017)', 'Revenues', 'Revenues change', 'Profits',
       'Profit Change', 'Assets', 'Market Value', 'Employee Count', 'CEO Name',
       'CEO Title', 'Sector', 'Industry', 'HQ Location', 'Website URL',
       'Years on Fortune List', 'Fortune URL'],
      dtype='object')

## Edit column headers to be friendlier to SQL.

In [36]:
fortune500_df.rename(columns={
    "Fortune  Rank (2018)":"fortune_rank",
    "Company Name":"name",
    "Ticker Symbol":"ticker",
    "Fortune Rank (2017)":"rank_2017",
    "Revenues":"revenues",
    "Revenues change":"revenue_change",
    "Profits":"profits",
    "Profit Change":"profit_change",
    "Assets":"assets",
    "Market Value":"market_value",
    "Employee Count":"num_employees",
    "CEO Name":"ceo_name",
    "CEO Title":"ceo_title",
    "Sector":"sector",
    "HQ Location":"hq_location",
    "Website URL":"url",
    "Years on Fortune List":"years_on_fortune_list",
    "Fortune URL":"fortune_url"
}, inplace=True)

## Push DataFrame to sqlite and check to see if written.

In [37]:
fortune500_df.set_index("fortune_rank")
fortune500_df.to_sql("fortune500_companies", conn, if_exists="replace")

In [38]:
f500 = pd.read_sql_query("select * from fortune500_companies", conn)

In [39]:
f500.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 505 entries, 0 to 504
Data columns (total 20 columns):
index                    505 non-null int64
fortune_rank             500 non-null float64
name                     502 non-null object
ticker                   471 non-null object
rank_2017                495 non-null float64
revenues                 500 non-null object
revenue_change           497 non-null object
profits                  500 non-null object
profit_change            436 non-null object
assets                   500 non-null object
market_value             472 non-null object
num_employees            500 non-null object
ceo_name                 500 non-null object
ceo_title                500 non-null object
sector                   500 non-null object
Industry                 500 non-null object
hq_location              500 non-null object
url                      500 non-null object
years_on_fortune_list    500 non-null float64
fortune_url              500 non-null 