In [1]:
#Import Package Dependencies
from DataAnalytics import DataAnalytics
import warnings
import pandas as pd
import numpy as np
import pickle
import pyodbc
#import openpyxl

warnings.filterwarnings("ignore")

In [2]:
# Create New Project
#  
# Creates a new instance of the DataAnalytics Package and links it to variable wd.
# Variable wd will allow the use of all methods in DataAnalytics package.
wd = DataAnalytics()


In [3]:
# Import Data: Delimited Files
#
# The example below imports data from a series of flat files.
# Files are located in a folder called 'data' in the same directory, 
# have the extension .asc 
# and are delimited with a semicolon (;)

source = 'file' 
folder = 'data\\'
tables = ['trans.asc','account.asc','client.asc','district.asc']

# Ex: wd.importFile('file.trans.asc','data\\trans.asc',';')
# 
# Import flat file 
# at location 'data\\trans.asc',
# use separator ';'
# and give the imported table the name 'file.trans.asc'
# 
# A for loop is set up below to import multiple files found in the same folder and with similar details.

for tbl in tables:
    
    wd.importFile(folder + tbl, ';',tblName = source + '.' + tbl)

In [4]:
# Import Data: SQL
#
# The example below imports data from a SQL data source.
# A table is being imported from a SQL Server database.
# The data connection is first defined by specifying the odbc driver, server, database and UserID.
# The password is not required for Windows Authenticated User logon.

# NOTE: The list of available ODBC drivers can be seen by running wd.available_drivers() .
#       If the required driver is not installed, please install and run the above command 
#       to ensure it is available.

driver = 'SQL Server'
server = 'BYTE'
db = 'AnalyticsData'
UID = 'BYTE\Rory Barrett'

# For convenience, wd.sqlCxn(...) returns a formatted connection string for the provided details. 
# The output can be read with a print statement. 
# Include pw = ... if password is required. wd.sqlCxn(driver,server,db,UID,pw='password')

# Ex: wd.importSQL(wd.sqlCxn(driver,server,db,UID),'[dbo].[account]',tblName = 'acc_x')
#
# Import data from SQL using connection string wd.sqlCxn(..), 
# table '[dbo].[account]' 
# and give the imported table the name 'acc_x' 

wd.importSQL(wd.sqlCxn(driver,server,db,UID),'[dbo].[account]',tblName = 'acc_x')

In [8]:
# Import Data: Excel Files
#
# The example below imports data from an excel file.
# File is located in a folder called 'data' in the same directory, 

folder = 'data\\'

# Ex: wd.importExcel('data\\teamsheet.xlsx')
# 
# Import XLSX file 
# at location 'data\\teamsheet.xlsx'

wd.importExcel(folder + 'teamsheet.xlsx')

# Ex: wd.importExcel('data\\teamsheet.xlsx', sheet='Names', tblName='Teamsheet_Names')
# 
# Import XLSX file 
# at location 'data\\teamsheet.xlsx', 
# use sheet 'Names' 
# and give the imported table the name 'Teamsheet_Names'
wd.importExcel(folder + 'teamsheet.xlsx', sheet='Names', tblName='Teamsheet_Names')

In [9]:
# Explore Directory 
# 
# All data imported above become part of the working directory of tables. 
# To list all tables in the directory run wd.explore()

# NOTE: Run wd.explore() to observe the results of the previous imports. 
#       When importing any data an optional argument called 'tblName' can 
#       be used to specify the name of the table once imported. (See previous examples) 
#       It is recommended to use this for all imports, if no tblName is specified the filename is used.
#       Most filenames need to be entered as the location of the file and because of this you may result 
#       in a tblName such as data\teamsheet.xlsx as seen below. 

wd.explore()

Unnamed: 0,Table Name
0,file.trans.asc
1,file.account.asc
2,file.client.asc
3,file.district.asc
4,acc_x
5,data\teamsheet.xlsx
6,Teamsheet_Names


In [12]:
# Working Directory
#
# The command wd.wd() can be used to see the directory of this project. 
# This may be helpful when referrring to the directory/folder.
wd.wd()

'c:\\Users\\Rory Barrett\\Labs\\data-analytics-framework\\data-analytics-framework'

In [None]:
## ----- End of Formal Documentation ----- 
# DISCLAIMER: Any items below may not be fully functional or have not been formally documented.

In [None]:
# Extract and Filter
wd.open("file.trans.asc")
cnd = "trans_id == 695247"
wd.extract("sample_trans",condition=cnd)

wd.open("file.district.asc")
wd.filter("A3 == 'north Moravia'")
wd.extract("northern Moravia", "A3 == 'north Moravia'")

# Explore
wd.explore()

# Exports
wd.export(wd.csv,filename='Reports\myCSV')
wd.export(wd.mdb,filename='Reports\myMDB')

InterfaceError: ('IM002', '[IM002] [Microsoft][ODBC Driver Manager] Data source name not found and no default driver specified (0) (SQLDriverConnect)')

In [None]:
wd.open("northern Moravia")

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
66,67,Bruntal,north Moravia,106054,38,25,6,2,6,63.1,8110,5.77,6.55,109,3244,3079
67,68,Frydek - Mistek,north Moravia,228848,15,40,18,2,6,57.2,9893,4.09,4.72,96,5623,5887
68,69,Jesenik,north Moravia,42821,4,13,5,1,3,48.4,8173,?,7.01,124,?,1358
69,70,Karvina,north Moravia,285387,0,2,8,5,7,89.9,10177,6.63,7.75,81,9878,10108
70,71,Novy Jicin,north Moravia,161227,5,35,11,4,10,69.7,8678,5.93,5.57,102,4980,4595
71,72,Olomouc,north Moravia,226122,32,50,7,4,4,62.6,8994,3.80,4.79,110,9672,9208
72,73,Opava,north Moravia,182027,17,49,12,2,7,56.4,8746,3.33,3.74,90,4355,4433
73,74,Ostrava - mesto,north Moravia,323870,0,0,0,1,1,100.0,10673,4.75,5.44,100,18782,18347
74,75,Prerov,north Moravia,138032,67,30,4,2,5,64.6,8819,5.38,5.66,99,4063,4505
75,76,Sumperk,north Moravia,127369,31,32,13,2,7,51.2,8369,4.73,5.88,107,3736,2807


In [None]:
wd.context.groupby(['A2'], as_index=False).agg(['count','sum','mean'])

Unnamed: 0_level_0,A1,A1,A1,A4,A4,A4,A5,A5,A5,A6,...,A11,A13,A13,A13,A14,A14,A14,A16,A16,A16
Unnamed: 0_level_1,count,sum,mean,count,sum,mean,count,sum,mean,count,...,mean,count,sum,mean,count,sum,mean,count,sum,mean
A2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Bruntal,1,67,67,1,106054,106054,1,38,38,1,...,8110,1,6.55,6.55,1,109,109,1,3079,3079
Frydek - Mistek,1,68,68,1,228848,228848,1,15,15,1,...,9893,1,4.72,4.72,1,96,96,1,5887,5887
Jesenik,1,69,69,1,42821,42821,1,4,4,1,...,8173,1,7.01,7.01,1,124,124,1,1358,1358
Karvina,1,70,70,1,285387,285387,1,0,0,1,...,10177,1,7.75,7.75,1,81,81,1,10108,10108
Novy Jicin,1,71,71,1,161227,161227,1,5,5,1,...,8678,1,5.57,5.57,1,102,102,1,4595,4595
Olomouc,1,72,72,1,226122,226122,1,32,32,1,...,8994,1,4.79,4.79,1,110,110,1,9208,9208
Opava,1,73,73,1,182027,182027,1,17,17,1,...,8746,1,3.74,3.74,1,90,90,1,4433,4433
Ostrava - mesto,1,74,74,1,323870,323870,1,0,0,1,...,10673,1,5.44,5.44,1,100,100,1,18347,18347
Prerov,1,75,75,1,138032,138032,1,67,67,1,...,8819,1,5.66,5.66,1,99,99,1,4505,4505
Sumperk,1,76,76,1,127369,127369,1,31,31,1,...,8369,1,5.88,5.88,1,107,107,1,2807,2807


In [None]:
wd.summBy(['A2'])

Unnamed: 0,A2,size
0,Bruntal,1
1,Frydek - Mistek,1
2,Jesenik,1
3,Karvina,1
4,Novy Jicin,1
5,Olomouc,1
6,Opava,1
7,Ostrava - mesto,1
8,Prerov,1
9,Sumperk,1


In [None]:
wd.context.columns = ["_".join(x) for x in wd.context.columns.ravel()]

In [None]:
not None

True

In [None]:
wd.addCol('Summers','A5 + A6')
wd.addCol('Winters','A5 * A6')
wd.addCol('Final', '"Summer was " + Summers')

UndefinedVariableError: name 'A5' is not defined

In [None]:
for tbl in wd.db:
    print(wd.db[tbl].memory_usage(index=True, deep=True).sum()/1000000.00)

270.996445
0.436334
0.128984
0.027385
0.000245
0.004466
0.001303


In [None]:
dbfile = open('db', 'ab')
pickle.dump(db, dbfile)                      
dbfile.close() 

NameError: name 'db' is not defined