In [1]:
# TODO: Total Paid, Total Due - be aware of the time frame
# TODO: 
import csv
import re
import os
import pandas as pd
import numpy as np
import xlsxwriter
import shutil

from datetime import datetime, date

In [2]:
file_root = './files'
church_export_root = './church'
event_export_root = './events'

pricing_breakdown = {
    '10-27-22': 75,
    '11-14-22': 90,
    '11-18-22': 125
}

event_exclusion_data_cols = [
    'Submission Date',
    'Form Submission Date',
    'I am registering as a',
    'Are you Participating or Spectating?',
    'District',
    'Birthday',
    'Street Address',
    'Street Address Line 2',
    'City',
    'State / Province',
    'Postal / Zip Code',
    'Cell Phone',
    'Your Email',
    'Gender',
    'Shirt Size',
    'First Name.1',
    'Last Name.1',
    'What is your youth leader/pastor\'s email?',
    'First Name.2',
    'Last Name.2',
    'Parent/Guardian Email',
    'Parent Cell Phone',
    'Please list any medical problems',
    'Please list any allergies',
    'Please list past surgeries',
    'Please list any medications and dosage you will be taking',
    'First Name.3',
    'Last Name.3',
    'Home Phone',
    'Work Phone',
    'Contact Phone',
    'Insurance Company',
    'Policy Number #',
    'Do you plan on attending TNT@TNU April 13th-April 16th?',
    'Payment',
    'Intenal Notes'
]

write_file_exlusion = ['First Name', 'Last Name', 'Grade Level', 'Age Level Individual Sport', 'Age Level', 'What church are you a part of?']

event_categories = [
    'Art',
    'Creative Ministries',
    'Creative Writing',
    'Speech',
    'Academics',
    'Vocal Music',
    'Instrumental Music',
    'Individual Sports',
    'Team Sports',
    'Quizzing'
]

church_list = [
    "Ashland City Faith Community",
    "Bell Road",
    "Bent Knee Cowboy",
    "Body of Christ",
    "Bread of Life Community",
    "Bridge Point",
    "Brookhaven First",
    "Clarksdale",
    "Clarksville First Community of Hope",
    "Clarksville Grace",
    "Clarksville Hope Riders Biker Church",
    "Cleveland First",
    "Columbia First",
    "Columbia Grace",
    "Columbus First",
    "Community Fellowship",
    "Concord Community",
    "Covington",
    "Crossroads",
    "Crossroads Community",
    "Crystal Springs",
    "DaySpring Community",
    "Dickson Lighthouse",
    "Donelson",
    "Dover",
    "Elmore",
    "Erin",
    "Fellowship",
    "Fly",
    "Foundry",
    "Franklin Community Faith",
    "Fulton First",
    "Gallatin First",
    "Gateway Community",
    "Glencliff Hispanic",
    "Gloster",
    "Goodlettsville",
    "Gray's Chapel",
    "Grenada First",
    "Griffin's Chapel",
    "Gulf Coast Family",
    "Harmony",
    "Hattiesburg First",
    "Hazkehurst",
    "Hendersonville",
    "Hermitage",
    "Highland",
    "Hillside Community",
    "Igelsia Del Nazareno Eben-Ezer",
    "Igelsia Hispana Betel de Gallatin",
    "Jason Chapel",
    "Joelton",
    "Jones Chapel",
    "Kingston Springs",
    "Laurel Bresee",
    "Laurel First",
    "Lewisburg First",
    "Libertad en Cristo",
    "Lifepointe Community",
    "Long Creek",
    "Madison",
    "Madison Hispanic",
    "Magnolia",
    "Memphis Calvary",
    "Memphis Eastside",
    "Memphis Emmanuel",
    "Memphis Friendship",
    "Memphis Grace",
    "Memphis Holiness Tabernacle",
    "Memphis Hope Tabernacle",
    "Memphis Refreshing Springs",
    "Meridian Fitkins Memorial",
    "Meridian Northside",
    "Millington",
    "Mount Wesley-Akin Chapel",
    "Nashville Blakemore",
    "Nashville Christ",
    "Nashville First (Main Campus)",
    "Nashville First (Bethel Campus)",
    "Nashville First (Calvary) Korean",
    "Nashville Grace",
    "Nashville Inglewood",
    "Nashville Kaleo",
    "Nashville Trinity",
    "New Albany First",
    "New Vision",
    "Old Hickory",
    "Oxford",
    "Palabra de Vida",
    "Paris",
    "Park Lane",
    "Pearson Chapel",
    "Pine Hill",
    "Pleasant Point",
    "Portland",
    "Prentiss",
    "Quitman",
    "Ripley Hispanic",
    "Rosebloom",
    "Savannah",
    "Sentobia N.O. Nabors Memorial",
    "Spring Hill Restoration Community",
    "Springfield First",
    "Starkville Wesley Community",
    "Trevecca Community",
    "Union",
    "Vicksburg First/Vicksburg Hispanic",
    "Victory Praise and Worship Center",
    "Waverly",
    "Way of the Cross",
    "Waynesboro Trinity",
    "Westwind Community of Faith",
    "Woodlawn Community"
]

In [3]:
files = os.listdir(file_root)
files = [f for f in files if os.path.isfile(f'{file_root}/{f}') and f != '.DS_Store'] 
files

['Momentum_Registration_-_20222022-11-18_11_19_03.csv']

In [4]:
def create_directory(process):
    outdir = f'./{process}/{date.today()}/'
    
    if not os.path.exists(outdir):
        os.mkdir(outdir)

In [5]:
def write_csv(df, df_name, process, filename):
    create_directory(process)
    if df_name not in write_file_exlusion:
        df.to_csv(f'{filename}.csv')

In [6]:
def write_excel(df, df_name, process, filename, indexed=False):
    create_directory(process)
    if df_name not in write_file_exlusion:
        df.to_excel(f'{filename}.xlsx', index=indexed, engine='xlsxwriter')

In [7]:
def move_to_processed(filename):
    src_file = f'{file_root}/{filename}'
    dst_file = f'./processed/{filename}'
    
    shutil.move(src_file, dst_file)

In [8]:
def strip_filename_from_download_date(filename):
    filename_parts = filename.split('_-_')
    return filename_parts[0]

strip_filename_from_download_date(files[0])

'Momentum_Registration'

In [9]:
def get_price(row):
    submission_date_string = row['Form Submission Date']
    price_dates = pricing_breakdown.keys()
    if row['I am registering as a'] == 'Adult':
        return 35

    if row['Are you Participating or Spectating?'] == 'Spectator (non-competing participant)':
        return 50
    
    for price_date_string in price_dates:
        submission_date = datetime.strptime(submission_date_string, '%b %d, %Y')
        price_date = datetime.strptime(price_date_string, '%m-%d-%y')
        if submission_date <= price_date:
            return pricing_breakdown[price_date_string]

In [10]:
def create_student_categories(file, filename):
   df_original = pd.read_csv(f'{file_root}/{file}')

   cols = df_original.columns;
    
   category_cols = [];
   for col in cols:
      if col not in event_exclusion_data_cols:
         category_cols.append(col)
    
   for category_col in category_cols:
      loc_cols = ['First Name', 'Last Name', 'What church are you a part of?']
      group_by_cols = [category_col]
      if 'sport' in category_col.lower():
         loc_cols.append('Age Level Individual Sport')
      else:
         loc_cols.append('Age Level')

      if category_col == 'Individual Sports':
         group_by_cols.append('Age Level Individual Sport')
      if category_col == 'Team Sports':
         group_by_cols.append('What church are you a part of?')
      if 'Music' in category_col:
         group_by_cols.append('What church are you a part of?')

      df = df_original.copy(deep=True)
      df = df.loc[:, [*loc_cols, category_col]].dropna(subset=[category_col])

      if category_col in event_categories:
         df[category_col] = df.loc[:, category_col].apply(lambda x: x.split('\n'))
         df_dedeup = df.drop_duplicates(subset=loc_cols)
         df_indexed = df_dedeup.explode(category_col).groupby([*group_by_cols, 'First Name', 'Last Name', 'What church are you a part of?']).sum()
         # display(df_indexed)
         
         export_path = f"{event_export_root}/{date.today()}/{category_col.replace(' ', '_').lower()}"
         write_excel(df_indexed, category_col, 'events', export_path, indexed=True)

# create_student_categories(files[0],strip_filename_from_download_date(files[0]))


In [11]:
def create_church_info_sheets(file, filename):
    df_original = pd.read_csv(f'{file_root}/{file}')

    df = df_original.copy(deep=True)
    cols = df_original.columns;
    
    category_cols = [];
    for col in cols:
        if col not in event_exclusion_data_cols:
            category_cols.append(col)
    
    for category_col in category_cols:
        if category_col in event_categories:
            df[category_col] = df.loc[:, category_col].str.split('\n')

    cols = ['First Name', 'Last Name', 'Grade Level', 'What church are you a part of?', 'I am registering as a', 'Are you Participating or Spectating?', 'Form Submission Date', *event_categories, 'First Name.1', 'Last Name.1', 'What is your youth leader/pastor\'s email?', 'Payment']
    df = df.loc[:, cols].fillna('')
    df = df.groupby(['What church are you a part of?', 'First Name', 'Last Name'], group_keys=False).apply(lambda x: x)
    # df.drop_duplicates(subset=['First Name', 'Last Name', 'Grade Level'], inplace=True)
    df.sort_values('Last Name')
    df.reset_index(inplace=True)

    for church in church_list:
        df_church = df.loc[df['What church are you a part of?'] == church]

        if not df_church.empty:
            df_church['Price'] = df_church.apply(lambda x: get_price(x), axis=1)
            df_church['Paid'] = df_church['Payment'].apply(lambda x: int(re.findall("[0-9]+", x)[0]) if re.findall("[0-9]+", x) else 0)
            df_church['Total Due'] = df_church.apply(lambda x: x['Price'] - x['Paid'], axis=1)
            df_church.rename(columns = {"First Name.1": "Youth Leader First Name", "Last Name.1": "Youth Leader Last Name"}, inplace = True)
            df_church.loc['Total',:] = df_church.sum(axis=0, numeric_only=True)
            
            # display(df_church)
            export_path = f"{church_export_root}/{date.today()}/{church.replace(' ', '_').lower()}"
            write_excel(df_church, church, 'church', export_path)


# create_church_info_sheets(files[0], strip_filename_from_download_date(files[0]))

In [12]:
for file in files:
    filename = strip_filename_from_download_date(file)

    create_student_categories(file, filename)
    create_church_info_sheets(file, filename)
    move_to_processed(file)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_church['Price'] = df_church.apply(lambda x: get_price(x), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_church['Paid'] = df_church['Payment'].apply(lambda x: int(re.findall("[0-9]+", x)[0]) if re.findall("[0-9]+", x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_church['T