# Fortune 500 and Exchange Transformations

This is the procedure to transform and load the fortune 500 and exchange rate datasets. 
Datasets to be included in this procedure will be: 
- List of Fortune 500 companies
- Indeed postings per fortune 500 company
- Stock prices for fortune 500 company
- Exchange Rates for each country. 

### Import necessary libraries

In [1]:
import pandas as pd

### Create DataFrames for each dataset.

In [2]:
fortune500_path = "./Data/Fortune500global_old.csv" # https://www.someka.net/excel-template/fortune-500-excel-list/

In [3]:
fortune500_df = pd.read_csv(fortune500_path, encoding = "ISO-8859-1")
fortune500_df.head()

Unnamed: 0,Rank,Company Name,Country,Number of Employees,Previous Rank,Revenues ($millions),Revenue Change,Profits ($millions),Profit Change,Assets ($millions)
0,1,Walmart,USA,2300000,1,500343,0.03,9862.0,-0.277,204522.0
1,2,State Grid,China,913546,2,348903,0.107,9533.4,-0.004,585278.0
2,3,Sinopec Group,China,667793,3,326953,0.222,1537.8,0.222,346545.0
3,4,China National Petroleum,China,1470193,4,326008,0.242,-690.5,-1.37,629411.0
4,5,Royal Dutch Shell,Netherlands,84000,7,311870,0.299,12977.0,1.837,407097.0


In [4]:
column_rename = {
    "Rank":"rank",
    "Company Name":"name",
    "Country":"country",
    "Number of Employees":"num_employees",
    "Previous \nRank":"previous_rank",
    "Revenues\n($millions)":"revenue_mil",
    "Revenue \nChange":"revenue_change",
    "Profits\n($millions)":"profits_mil",
    "Profit \nChange":"profit_change",
    "Assets\n($millions)":"assets_mil"
}

fortune500_df.rename(columns=column_rename, inplace=True)
fortune500_df.set_index("name", inplace=True)
fortune500_df.head(10)

Unnamed: 0_level_0,rank,country,num_employees,previous_rank,revenue_mil,revenue_change,profits_mil,profit_change,assets_mil
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Walmart,1,USA,2300000,1,500343,0.03,9862.0,-0.277,204522.0
State Grid,2,China,913546,2,348903,0.107,9533.4,-0.004,585278.0
Sinopec Group,3,China,667793,3,326953,0.222,1537.8,0.222,346545.0
China National Petroleum,4,China,1470193,4,326008,0.242,-690.5,-1.37,629411.0
Royal Dutch Shell,5,Netherlands,84000,7,311870,0.299,12977.0,1.837,407097.0
Toyota Motor,6,Japan,369124,5,265172,0.041,22510.1,0.332,473133.0
Volkswagen,7,Germany,642292,6,260028,0.082,13107.3,1.208,506956.0
BP,8,Britain,74000,12,244582,0.311,3389.0,28.47,276515.0
Exxon Mobil,9,USA,71200,10,244363,0.174,19710.0,1.514,348691.0
Berkshire Hathaway,10,USA,377000,8,242137,0.083,44940.0,0.867,702095.0


### Load dataset into MySQL with sqlite

In [5]:
from config import username, passwd, location, database
import pymysql
pymysql.install_as_MySQLdb()

In [32]:
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

In [33]:
Base = declarative_base()

class Fortune500(Base):
    __tablename__ = "fortune500_companies"
    name = Column(String(255), primary_key=True)
    rank = Column(String(255))
    country = Column(String(255))
    num_employees = Column(Float)
    previous_rank = Column(String(255))
    revenue_mil = Column(Integer)
    revenue_change = Column(String(255))
    profits_mil = Column(String(255))
    profit_change = Column(String(255))
    assets_mil = Column(String(255))

In [34]:
engine = create_engine("sqlite:///fortune500_db.sqlite")
conn = engine.connect()

Base.metadata.create_all(engine)

from sqlalchemy.orm import Session
session = Session(bind=engine)

In [35]:
index = fortune500_df.index[1]
index

'State Grid'

In [37]:
# Create a forloop to iterate through all companies in dataset
for i in range(2,3):
    # Set the row to insert class variable
    index = fortune500_df.index[i]
    
    # Define class variables
    company = Fortune500(
        name = index,
        rank = fortune500_df.loc[index][0],
        country = fortune500_df.loc[index][1],
        num_employees = fortune500_df.loc[index][2],
        previous_rank = fortune500_df.loc[index][3],
        revenue_mil = fortune500_df.loc[index][4],
        revenue_change = fortune500_df.loc[index][5],
        profits_mil = fortune500_df.loc[index][6],
        profit_change = fortune500_df.loc[index][7],
        assets_mil = fortune500_df.loc[index][8])
    
    session.add(company)
    session.commit()
    

InvalidRequestError: This Session's transaction has been rolled back due to a previous exception during flush. To begin a new transaction with this Session, first issue Session.rollback(). Original exception was: (sqlite3.IntegrityError) UNIQUE constraint failed: fortune500_companies.name [SQL: 'INSERT INTO fortune500_companies (name, rank, country, num_employees, previous_rank, revenue_mil, revenue_change, profits_mil, profit_change, assets_mil) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'] [parameters: ('State Grid', 2, 'China', 913546.0, '2', 348903, '0.107', '9533.4', '-0.004', 585278.0)] (Background on this error at: http://sqlalche.me/e/gkpj)

In [30]:
comapny_list = session.query(Fortune500)
for company in comapny_list:
    print(company.num_employees)

b'`\x18#\x00\x00\x00\x00\x00'
b'\x8a\xf0\r\x00\x00\x00\x00\x00'
