## Loading CSV Files using Python

In [1]:
import csv

In [8]:
path = '/Users/joshua/Development/business-analysis/Data/sales_data_sample.csv'

In [9]:
csv_file = open(path)

In [10]:
file_reader = csv.DictReader(csv_file)

In [13]:
file_list = []

In [15]:
for row in file_reader:
    file_list.append(row)

In [22]:
file_list[:1]

[OrderedDict([('\ufeffORDERNUMBER', '10134'),
              ('QUANTITYORDERED', '41'),
              ('PRICEEACH', '94.74'),
              ('ORDERLINENUMBER', '2'),
              ('SALES', '3884.34'),
              ('ORDERDATE', '7/1/03 0:00'),
              ('STATUS', 'Shipped'),
              ('QTR_ID', '3'),
              ('MONTH_ID', '7'),
              ('YEAR_ID', '2003'),
              ('PRODUCTLINE', 'Motorcycles'),
              ('MSRP', '95'),
              ('PRODUCTCODE', 'S10_1678'),
              ('CUSTOMERNAME', 'Lyon Souveniers'),
              ('PHONE', '+33 1 46 62 7555'),
              ('ADDRESSLINE1', '27 rue du Colonel Pierre Avia'),
              ('ADDRESSLINE2', ''),
              ('CITY', 'Paris'),
              ('STATE', ''),
              ('POSTALCODE', '75508'),
              ('COUNTRY', 'France'),
              ('TERRITORY', 'EMEA'),
              ('CONTACTLASTNAME', 'Da Cunha'),
              ('CONTACTFIRSTNAME', 'Daniel'),
              ('DEALSIZE', 'Medium'

In [23]:
customers = [record['CUSTOMERNAME'] for record in file_list]

In [24]:
customers[:6]

['Lyon Souveniers',
 'Toys4GrownUps.com',
 'Corporate Gift Ideas Co.',
 'Technics Stores Inc.',
 'Daedalus Designs Imports',
 'Herkku Gifts']

## Loading CSV and Excel Files using Pandas

In [1]:
import pandas as pd
import numpy as np

In [25]:
!pwd

/Users/joshua/Development/business-analysis/Tutorials


In [26]:
%cd ../Data

/Users/joshua/Development/business-analysis/Data


In [7]:
!ls

customer_data.xlsx     sales_data_sample.xlsx
sales_data_sample.csv  sales_data_sample_no_customer.xlsx


In [8]:
df_2 = pd.read_csv('sales_data_sample.csv')

In [9]:
xlsx = pd.ExcelFile('sales_data_sample.xlsx')
xlsx

<pandas.io.excel.ExcelFile at 0x117495f60>

In [11]:
sheets = xlsx.sheet_names
sheets

['sales_data_sample']

In [12]:
df = pd.read_excel(xlsx, sheets[0])
df.head()

Unnamed: 0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,...,ADDRESSLINE1,ADDRESSLINE2,CITY,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE
0,10107,30,95.7,2,2871.0,2003-02-24,Shipped,1,2,2003,...,897 Long Airport Avenue,,NYC,NY,10022.0,USA,,Yu,Kwai,Small
1,10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5,2003,...,59 rue de l'Abbaye,,Reims,,51100.0,France,EMEA,Henriot,Paul,Small
2,10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,...,27 rue du Colonel Pierre Avia,,Paris,,75508.0,France,EMEA,Da Cunha,Daniel,Medium
3,10145,45,83.26,6,3746.7,2003-08-25,Shipped,3,8,2003,...,78934 Hillside Dr.,,Pasadena,CA,90003.0,USA,,Young,Julie,Medium
4,10159,49,100.0,14,5205.27,2003-10-10,Shipped,4,10,2003,...,7734 Strong St.,,San Francisco,CA,,USA,,Brown,Julie,Medium


In [13]:
df.columns

Index(['ORDERNUMBER', 'QUANTITYORDERED', 'PRICEEACH', 'ORDERLINENUMBER',
       'SALES', 'ORDERDATE', 'STATUS', 'QTR_ID', 'MONTH_ID', 'YEAR_ID',
       'PRODUCTLINE', 'MSRP', 'PRODUCTCODE', 'CUSTOMERNAME', 'PHONE',
       'ADDRESSLINE1', 'ADDRESSLINE2', 'CITY', 'STATE', 'POSTALCODE',
       'COUNTRY', 'TERRITORY', 'CONTACTLASTNAME', 'CONTACTFIRSTNAME',
       'DEALSIZE'],
      dtype='object')

In [14]:
df.shape

(2823, 25)

In [15]:
df['CUSTOMERNAME'].nunique()

92

In [16]:
df['PRODUCTCODE'].nunique()

109

In [17]:
df['ORDERNUMBER'].nunique()

307

In [18]:
data = df[['CUSTOMERNAME','ORDERNUMBER','PRODUCTCODE','QUANTITYORDERED','PRICEEACH']].copy()
data.head()

Unnamed: 0,CUSTOMERNAME,ORDERNUMBER,PRODUCTCODE,QUANTITYORDERED,PRICEEACH
0,Land of Toys Inc.,10107,S10_1678,30,95.7
1,Reims Collectables,10121,S10_1678,34,81.35
2,Lyon Souveniers,10134,S10_1678,41,94.74
3,Toys4GrownUps.com,10145,S10_1678,45,83.26
4,Corporate Gift Ideas Co.,10159,S10_1678,49,100.0


In [19]:
data['extended_price'] = (data.QUANTITYORDERED * data.PRICEEACH)
data.head()

Unnamed: 0,CUSTOMERNAME,ORDERNUMBER,PRODUCTCODE,QUANTITYORDERED,PRICEEACH,extended_price
0,Land of Toys Inc.,10107,S10_1678,30,95.7,2871.0
1,Reims Collectables,10121,S10_1678,34,81.35,2765.9
2,Lyon Souveniers,10134,S10_1678,41,94.74,3884.34
3,Toys4GrownUps.com,10145,S10_1678,45,83.26,3746.7
4,Corporate Gift Ideas Co.,10159,S10_1678,49,100.0,4900.0


In [20]:
data.columns = ['customer','order_number','part_number','quantity_sold','sell_price','item_total']
data.head()

Unnamed: 0,customer,order_number,part_number,quantity_sold,sell_price,item_total
0,Land of Toys Inc.,10107,S10_1678,30,95.7,2871.0
1,Reims Collectables,10121,S10_1678,34,81.35,2765.9
2,Lyon Souveniers,10134,S10_1678,41,94.74,3884.34
3,Toys4GrownUps.com,10145,S10_1678,45,83.26,3746.7
4,Corporate Gift Ideas Co.,10159,S10_1678,49,100.0,4900.0
