In [33]:
import pandas as pd
import datetime
import sqlite3
import time
import re
import calendar
import schedule

## Create the connection to SQL Lite
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)

    return conn

con = create_connection("project (1).db")

cursor = con.cursor()

res = cursor.execute("SELECT type, name FROM sqlite_master WHERE type='table'").fetchall()
all_tables = pd.DataFrame(res)
all_tables.columns = ["type","name"]
all_tables

transactions_raw_to_add = pd.read_sql("SELECT * FROM transactions", con)

transactions_clean = pd.read_sql("SELECT * FROM transactions_clean", con)

# Remove the NA columns and Remove Columns that are not needed 
new_transactions_clean_to_add = transactions_raw_to_add.dropna(how='all', axis=1)\
.drop(["Policy Level Rate", "Is Processed", "New To Medicare", "Company Business Unit Code",
       "Insured Name","CarrierCommissionID","Carrier Group","Product Code"], axis=1)

# Check the column names match from the raw transactions dataset and the cleaned dataset
print(new_transactions_clean_to_add.dtypes)
transactions_clean.dtypes

## Change the date columns in both datasets to be datetime objects

dates_col = ["Effective Date","Transaction Date","Due Date","Date Entered"]

for i in dates_col:
    new_transactions_clean_to_add[i] = pd.to_datetime(new_transactions_clean_to_add[i], format='%Y-%m-%d')
    transactions_clean[i] = pd.to_datetime(transactions_clean[i], format='%Y-%m-%d')

## Check to see if the data types were changed
print(new_transactions_clean_to_add[dates_col].dtypes)
print(transactions_clean[dates_col].dtypes)

## Create a new column in both datasets to find the year-month combination of Transaction Date
# months_in_cleaned = pd.to_datetime(transactions_clean['Transaction Date']).dt.to_period('M').drop_duplicates()
months_in_cleaned = transactions_clean['Transaction Date'].dt.strftime('%Y-%m').drop_duplicates().sort_values()

# Store the last month so I can filter it out of the cleaned transaction dataframe
last_month = months_in_cleaned[-1:]

# Remove the last month from the filtering list, since it's possible for transactions from the same month in the total transactions dataframe
months_in_cleaned = months_in_cleaned[:-1]

## Creating year-month combination of Transaction Date for full transactions dataframe
new_transactions_clean_to_add['month_year'] = new_transactions_clean_to_add['Transaction Date'].dt.strftime('%Y-%m')

## Filter out the dates that are already found in the already cleaned transactions dataset
transactions_to_clean_and_append = new_transactions_clean_to_add[~new_transactions_clean_to_add["month_year"].isin(months_in_cleaned)]

## Print the dates not in the already cleaned dataset. This dataset needs to be cleaned and appended to the already cleaned dataframe
transactions_to_clean_and_append["month_year"].unique()

# Remove all entries from the last month of transactions_clean so we can reaggregate for potenial missing data
transactions_clean['month_year'] = transactions_clean['Transaction Date'].dt.strftime('%Y-%m')
transactions_clean = transactions_clean[~transactions_clean['month_year'].isin(last_month)]

# Check to see if the last month is in the list of month years from transaction_clean
print(last_month.isin(list(set(transactions_clean["month_year"]))))

## Look at the different values in Transaction Code and edit the Transaction Code column into lowercase

# See the different values in Transaction Code and how many values there are 
print(transactions_to_clean_and_append["Transaction Code"].unique())
print(transactions_to_clean_and_append["Transaction Code"].nunique())

# Change Transaction Code into lowercase and see how many values there are
transactions_to_clean_and_append["Transaction Code"] = new_transactions_clean_to_add["Transaction Code"].str.lower()
print(transactions_to_clean_and_append["Transaction Code"].unique())
print(transactions_to_clean_and_append["Transaction Code"].nunique())

## Append the newly cleaned transactions to the already cleaned transactions
frames = [transactions_clean, transactions_to_clean_and_append]
transactions_clean_for_reporting = pd.concat(frames)

## Check the number of rows in the already cleaned dataset + new rows = number of rows from total transactions 
print(len(transactions_clean_for_reporting))
len(new_transactions_clean_to_add)

# Read in monthly transaction report template
monthly_transaction_report = pd.read_sql("SELECT * FROM monthly_transaction_report", con)

# Filter out the last month of the report
monthly_transaction_report = monthly_transaction_report[~monthly_transaction_report['month'].isin(last_month)]
# monthly_transaction_report

## Create Monthly Transaction Report for new months

# Create aggregations
monthly_transaction_report_new_months = transactions_to_clean_and_append.groupby("month_year")\
                                                     .agg({'month_year' : ['count'],
                                                           'Revenue' : ['sum', 'mean'],
                                                           'Premium' : ['sum', 'mean'],
                                                           'Commissionable Premium' : ['sum', 'mean'],
                                                           'Total Commission' : ['sum', 'mean']})

# Reset index so the months don't become row labels
monthly_transaction_report_new_months = monthly_transaction_report_new_months.reset_index()


# # Rename columns to match the template
monthly_transaction_report_new_months.columns = ['month','trx_count',
                                                 "Revenue_sum","Revenue_mean",
                                                 "Premium_sum","Premium_mean",
                                                 "Commissionable Premium_sum", "Commissionable Premium_mean", 
                                                 "Total Commission_sum", "Total Commission_mean"]

# Add new months to template
frames_monthly_txn = [monthly_transaction_report, monthly_transaction_report_new_months]
updated_monthly_txn_report = pd.concat(frames_monthly_txn)
updated_monthly_txn_report.head(3)

# Read in monthly product report template
monthly_product_report = pd.read_sql("SELECT * FROM monthly_product_report", con)

# Filter out the last month of the report
monthly_product_report = monthly_product_report[~monthly_product_report['month'].isin(last_month)]

# list(monthly_product_report["month"].drop_duplicates().sort_values())
# monthly_product_report.head(2)

## Create Monthly Product Report for new months

# Create aggregations
monthly_product_report_new_months = transactions_to_clean_and_append.groupby(["ProductName","month_year"])\
                                                     .agg({'month_year' : ['count'],
                                                           'Revenue' : ['sum', 'mean'],
                                                           'Premium' : ['sum', 'mean'],
                                                           'Commissionable Premium' : ['sum', 'mean'],
                                                           'Total Commission' : ['sum', 'mean']})
# Reset index so the months don't become row labels
monthly_product_report_new_months = monthly_product_report_new_months.reset_index()
monthly_product_report_new_months

# Rename columns to match the template
monthly_product_report_new_months.columns = ['ProductName','month','trx_count',
                                                 "Revenue_sum","Revenue_mean",
                                                 "Premium_sum","Premium_mean",
                                                 "Commissionable Premium_sum", "Commissionable Premium_mean", 
                                                 "Total Commission_sum", "Total Commission_mean"]

# monthly_product_report_new_months.head(3)

# Add new months to template
frames_prdt_txn = [monthly_product_report, monthly_product_report_new_months]
updated_prdt_txn_report = pd.concat(frames_prdt_txn).sort_values(["ProductName","month"])

# Check a subset of values
updated_prdt_txn_report[updated_prdt_txn_report["ProductName"] == "DENTAL/VISION"].head(3)

# Read in monthly agent report template
monthly_agent_report = pd.read_sql("SELECT * FROM monthly_agent_report", con)

# Filter out the last month of the report
monthly_agent_report = monthly_agent_report[~monthly_agent_report['month'].isin(last_month)]

# print(list(monthly_agent_report["month"].drop_duplicates().sort_values()))
# monthly_agent_report.sort_values("month").head(5)

## Create Monthly Agent Report for new months

# Create aggregations
monthly_agent_report_new_months = transactions_to_clean_and_append.groupby(["Agent Code","month_year"])\
                                                     .agg({'month_year' : ['count'],
                                                           'Revenue' : ['sum', 'mean'],
                                                           'Premium' : ['sum', 'mean'],
                                                           'Commissionable Premium' : ['sum', 'mean'],
                                                           'Total Commission' : ['sum', 'mean']})
# Reset index so the months don't become row labels
monthly_agent_report_new_months = monthly_agent_report_new_months.reset_index()
monthly_agent_report_new_months

# Rename columns to match the template
monthly_agent_report_new_months.columns = ['Agent Code','month','trx_count',
                                                 "Revenue_sum","Revenue_mean",
                                                 "Premium_sum","Premium_mean",
                                                 "Commissionable Premium_sum", "Commissionable Premium_mean", 
                                                 "Total Commission_sum", "Total Commission_mean"]

# monthly_agent_report_new_months.head(3)

# Add new months to template
frames_agent_txn = [monthly_agent_report, monthly_agent_report_new_months]
updated_agent_txn_report = pd.concat(frames_agent_txn).sort_values(["Agent Code","month"])

# Check a few rows
updated_agent_txn_report[updated_agent_txn_report["Agent Code"] == '3236007'].head(3)

# import datetime
# any_day = datetime.date(2020,3,1)
# print(any_day)
# today = datetime.date.today()
# print(today)

def today_date(first_of_month):
    """
    Create a function that pulls the first and last day of the previous month, and 
    create a date filter for filtering reports
    """
    if first_of_month.month == 1:
        year_interested = first_of_month.year - 1
        month_interested = 12
        day_start = 1
        day_end = calendar.monthrange(year_interested,month_interested)[1]
        
        date_start = datetime.date(year_interested,month_interested,day_start)
        date_end = datetime.date(year_interested,month_interested,day_end)
    else:
        year_interested = first_of_month.year
        month_interested = (first_of_month.month - 1)
        day_start = 1
        day_end = calendar.monthrange(year_interested,month_interested)[1]

        date_start = datetime.date(year_interested,month_interested,day_start)
        date_end = datetime.date(year_interested,month_interested,day_end)
    
    # If the month length is <2, then add a leading 0 to month
    if len(str(date_start.month)) > 1:
        date_filter = str(date_start.year) + '-' + str(date_start.month)
    else:
        date_filter = str(date_start.year) + '-' + '0' + str(date_start.month)
    
    return date_start, date_end, date_filter

# Using function
fn_return = today_date(datetime.date(2020,2,1))
print(fn_return)

# Create a list of all the months to run
months_to_run = [datetime.date(2020,1,1),
                 datetime.date(2020,2,1),
                 datetime.date(2020,3,1),
                 datetime.date(2020,4,1),
                 datetime.date(2020,5,1),
                 datetime.date(2020,6,1),
                 datetime.date(2020,7,1)]

def txn_report_generator():
    starting_report_txn = monthly_transaction_report
    for i in (months_to_run):
        fn_return = today_date(i)
        date_filter = fn_return[2]
        
        monthly_txn_report_additional_month = updated_monthly_txn_report[updated_monthly_txn_report["month"] == date_filter]
        month_txn = [starting_report_txn, monthly_txn_report_additional_month]
        starting_report_txn = pd.concat(month_txn).sort_values(["month"])        
        
        print(date_filter)
#     return starting_report.to_csv('mtr.csv')

    starting_report_prod = monthly_product_report
    for i in (months_to_run):
        fn_return = today_date(i)
        date_filter = fn_return[2]
        
        monthly_prod_report_additional_month = updated_prdt_txn_report[updated_prdt_txn_report["month"] == date_filter]
        month_txn = [starting_report_prod, monthly_prod_report_additional_month]
        starting_report_prod = pd.concat(month_txn).sort_values(["month"])        
        
        print(date_filter)
#     return starting_report.to_csv('ptr.csv')

    starting_report_agent = monthly_agent_report
    for i in (months_to_run):
        fn_return = today_date(i)
        date_filter = fn_return[2]
        
        monthly_agent_report_additional_month = updated_agent_txn_report[updated_agent_txn_report["month"] == date_filter]
        month_txn = [starting_report_agent, monthly_agent_report_additional_month]
        starting_report_agent = pd.concat(month_txn).sort_values(["month"])        
        
        print(date_filter)
    return starting_report_txn.to_csv('mtr.csv'), starting_report_prod.to_csv('ptr.csv'), starting_report_agent.to_csv('etr.csv')

schedule.every(15).seconds.do(txn_report_generator)

while True:
    schedule.run_pending()
    time.sleep(1)

Carrier                             object
Policy #                            object
Effective Date                      object
Transaction Date                    object
Due Date                            object
Premium                            float64
Commissionable Premium             float64
Revenue                            float64
Transaction Code                    object
Policy Year                         object
State                               object
ProductName                         object
Agent Code                          object
Product Type Name                   object
Total Commission                   float64
Insert Date                         object
Date Entered                        object
Carrier Transaction Description     object
dtype: object
Effective Date      datetime64[ns]
Transaction Date    datetime64[ns]
Due Date            datetime64[ns]
Date Entered        datetime64[ns]
dtype: object
Effective Date      datetime64[ns]
Transaction Date    dat

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  transactions_to_clean_and_append["Transaction Code"] = new_transactions_clean_to_add["Transaction Code"].str.lower()


['override' 'comm override' 'chargeback' 'adjustment' 'commission'
 'policy fee' 'rr']
7
712127
(datetime.date(2020, 1, 1), datetime.date(2020, 1, 31), '2020-01')
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2019-12
2020-01
2020-02
2020-03
2020-04
2020-05
2020-06


KeyboardInterrupt: 

 
### Stretch: Task V

Complete Tasks I, II, III using SQL only. Task IV remains the same in this Stretch Task.
