In [1]:
#Import Package Dependencies
from DataAnalytics import DataAnalytics
import warnings
import pandas as pd
import pyodbc
import os

warnings.filterwarnings("ignore")

In [2]:
# Create New Project
#  
# Creates a new instance of the DataAnalytics Package and links it to variable wd.
# Variable wd will allow the use of all methods in DataAnalytics package.
#
# July 2021 Update
#
# All dataframes will have a copy stored to disk using .das file extension when created.
#  
# NOTE: If .das files are detected in the current working folder then those 
#       when a new instance of DataAnalytics is created, those dataframes 
#       will be loaded into the new instance.

wd = DataAnalytics()


In [3]:
# Import Data: Delimited Files
#
# The example below imports data from a group of flat files.
# Files are located in a folder called 'data' in the same directory, 
# have the extension .asc 
# and are delimited with a semicolon (;)

source = 'file' 
folder = 'data\\'
tables = ['trans.asc','account.asc','client.asc','district.asc']

# Ex: wd.importFile('file.trans.asc','data\\trans.asc',';')
# 
# Import flat file 
# at location 'data\\trans.asc',
# use separator ';'
# and give the imported table the name 'file.trans.asc'
# 
# A for loop is set up below to import multiple files found in the same folder and with similar details.

for tbl in tables:
    
    wd.importFile(folder + tbl, ';',tblName = source + '.' + tbl)

In [4]:
# Import Data: SQL
#
# The example below imports data from a SQL data source.
# A table is being imported from a SQL Server database.
# The data connection is first defined by specifying the odbc driver, server, database and UserID.
# The password is not required for Windows Authenticated User logon.

# NOTE: The list of available ODBC drivers can be seen by running wd.available_drivers() .
#       If the required driver is not installed, please install and run the above command 
#       to ensure it is available.

driver = 'SQL Server'
server = 'BYTE'
db = 'AnalyticsData'
UID = 'BYTE\Rory Barrett'

# For convenience, wd.sqlCxn(...) returns a formatted connection string for the provided details. 
# The output can be read with a print statement. 
# Include pw = ... if password is required. wd.sqlCxn(driver,server,db,UID,pw='password')

# Ex: wd.importSQL(wd.sqlCxn(driver,server,db,UID),'[dbo].[account]',tblName = 'acc_x')
#
# Import data from SQL using connection string wd.sqlCxn(..), 
# table '[dbo].[account]' 
# and give the imported table the name 'acc_x' 

wd.importSQL(wd.sqlCxn(driver,server,db,UID),'[dbo].[account]',tblName = 'acc_x')

In [5]:
# Import Data: Excel Files
#
# The example below imports data from an excel file.
# File is located in a folder called 'data' in the same directory, 

folder = 'data\\'

# Ex: wd.importExcel('data\\teamsheet.xlsx')
# 
# Import XLSX file 
# at location 'data\\teamsheet.xlsx'

wd.importExcel(folder + 'teamsheet.xlsx')

# Ex: wd.importExcel('data\\teamsheet.xlsx', sheet='Names', tblName='Teamsheet_Names')
# 
# Import XLSX file 
# at location 'data\\teamsheet.xlsx', 
# use sheet 'Names' 
# and give the imported table the name 'Teamsheet_Names'
wd.importExcel(folder + 'teamsheet.xlsx', sheet='Names', tblName='Teamsheet_Names')

In [6]:
# Explore Directory 
# 
# All data imported above become part of the working directory of tables. 
# To list all tables in the directory run wd.explore()

# NOTE: Run wd.explore() to observe the results of the previous imports. 
#       When importing any data an optional argument called 'tblName' can 
#       be used to specify the name of the table once imported. (See previous examples) 
#       It is recommended to use this for all imports, if no tblName is specified the filename is used.
#       Most filenames need to be entered as the location of the file and because of this you may result 
#       in a tblName such as data\teamsheet.xlsx as seen below. 

wd.explore()

Unnamed: 0,Table Name
0,acc_x
1,file.account.asc
2,file.client.asc
3,file.district.asc
4,file.trans.asc
5,Teamsheet_Names
6,tmp.account.asc
7,tmp.client.asc
8,tmp.district.asc
9,tmp.trans.asc


In [None]:
# Working Directory
#
# The command wd.wd() can be used to see the directory of this project. 
# This may be helpful when referrring to the directory/folder.
wd.wd()

In [None]:
## ----- End of Formal Documentation ----- 
# DISCLAIMER: Any items below may not be fully functional or have not been formally documented.

In [None]:
# Extract and Filter
wd.open("file.trans.asc")
cnd = "trans_id == 695247"
wd.extract("sample_trans",condition=cnd)

wd.open("file.district.asc")
wd.filter("A3 == 'north Moravia'")
wd.extract("northern Moravia", "A3 == 'north Moravia'")

# Explore
wd.explore()

# Exports
wd.export(wd.csv,filename='Reports\myCSV')
wd.export(wd.mdb,filename='Reports\myMDB')

In [None]:
wd.open("northern Moravia")

In [None]:
wd.context.groupby(['A2'], as_index=False).agg(['count','sum','mean'])

In [None]:
wd.summBy(['A2'])

In [None]:
wd.context.columns = ["_".join(x) for x in wd.context.columns.ravel()]

In [None]:
not None

In [None]:
wd.addCol('Summers','A5 + A6')
wd.addCol('Winters','A5 * A6')
wd.addCol('Final', '"Summer was " + Summers')

In [None]:
for tbl in wd.db:
    print(wd.db[tbl].memory_usage(index=True, deep=True).sum()/1000000.00)

In [None]:
dbfile = open('db', 'ab')
pickle.dump(db, dbfile)                      
dbfile.close() 