In [13]:
import pandas as pd
from gspread_pandas import Spread, Client
import random
import numpy as np
from time import localtime, strftime
import gspread
from gspread_formatting import *

In [14]:
# Authenticate Google Sheets
gc = gspread.oauth()

In [18]:
# Get input and output spreadsheet names, open output spreadsheet
input_spreadsheet_names = list(input("Enter input spreadsheet names (sep by commas): ").split(","))
output_sheet_name = input("Enter name of output spreadsheet: ")
entries_per_sample = int(input("Enter the number of samples to be taken per input sheet: "))
num_samples = int(input("Enter the number of times to randomly select the number of samples: "))

In [20]:
output_spreadsheet_name = Spread(output_sheet_name)
output_gspread = gc.open(output_sheet_name)

stamp_format = cellFormat(
    backgroundColor=color(1, 0.9, 0.9),
    textFormat=textFormat(bold=False, foregroundColor=color(0,0,0)),
    horizontalAlignment='CENTER'
)

def stamp_output(output_tab_name,url):
    current_time = str(strftime("%Y-%m-%d %H:%M:%S",localtime()))
    hyperlink = '=HYPERLINK("'+url+'","Link to Original")'
    output_spreadsheet_name.update_cells("A1","C1", ["Generated by: spencer@casetext.com",current_time,hyperlink])
    format_cell_range(output_gspread.worksheet(output_tab_name), 'A1:C1', stamp_format)

header_format = cellFormat(
    textFormat=textFormat(bold=True),
)

def format_header(output_sheet_name):
    format_cell_range(output_gspread.worksheet(output_sheet_name), '2', header_format)

def format_worksheet(output_sheet_name):
    output_gspread.worksheet(output_sheet_name).columns_auto_resize(0,output_gspread.worksheet(output_sheet_name).col_count)

In [25]:
def create_input_sheet_dict(sheet_names):
    print(sheet_names)
    inputs_dict = {}
    for sheet_name in sheet_names:
        key_name = sheet_name[0:7]
        sheet_gspread = gc.open(sheet_name)
        spreadsheet_url = "https://docs.google.com/spreadsheets/d/%s" % sheet_gspread.id
        input_data = {"df": Spread(sheet_name).sheet_to_df(),
                     "url": spreadsheet_url,
                     "batch": key_name[6]
                    }
        inputs_dict[key_name] = input_data
    return inputs_dict
    
def create_output(inputs_data_dict, output_name, entries_per_sample, num_samples):
    for sample in range(num_samples):
        for sheet,values in inputs_data_dict.items():
            print("Batch loading...")
            firm_indices = []
            df = pd.DataFrame(values["df"])
            firm_indices = list(set(df.index.values.tolist()))
            while("" in firm_indices):
                firm_indices.remove("")
            if len(firm_indices) > entries_per_sample:
                firm_indices = random.sample(firm_indices, entries_per_sample)
                df = df.loc[firm_indices]
                tab_name = sheet if num_samples == 1 else "Sample" + str(sample+1)
                output_spreadsheet_name.df_to_sheet(df,
                        index=True,
                        start='A2',
                        sheet=tab_name,
                        freeze_headers=True,
                        )
                stamp_output(tab_name,values["url"])
                format_header(tab_name)
                format_worksheet(tab_name)
                print(f"Tab {tab_name} uploaded successfully.")
    print(f"All sheets loaded successfully. Outputs: https://docs.google.com/spreadsheets/d/{output_gspread.id}")

In [23]:
clean_input_data = create_input_sheet_dict(input_spreadsheet_names)

['01_FL_02_CholanRaysoft_Pavithra', '02_FL_02_DeviRaysoft_Visalakshi', '03_FL_02_Hemalatha__Revathi.K', '04_FL_02_JayaShree__Kanimozhi.K', '05_FL_02_Kalpanaray__Manjulavathi', '06_FL_02_Kanagavalli__Uma', '07_FL_02_Parimalaray__SathyaPriya R', '08_FL_02_Priyankaray__Logeswari', '09_FL_02_Ramachandranraysoft__Yeasumani', '10_FL_02_UmaMaheswari__Veena', '11_FL_02_VeenaRaysoft__Veeramani', '12_FL_02_Bridget Mary_Swathi', '13_FL_02_Madhivanan Ilangovan_Kanagavalli', '14_FL_02_Priyadharshini_Sona']


In [26]:
create_output(clean_input_data,output_sheet_name,entries_per_sample,num_samples)

Batch loading...
Tab 01_FL_0 uploaded successfully.
Batch loading...
Tab 01_FL_0 uploaded successfully.
Batch loading...
Tab 01_FL_0 uploaded successfully.
Batch loading...
Tab 04_FL_0 uploaded successfully.
Batch loading...
Tab 04_FL_0 uploaded successfully.
Batch loading...
Tab 06_FL_0 uploaded successfully.
Batch loading...
Tab 07_FL_0 uploaded successfully.
Batch loading...
Tab 08_FL_0 uploaded successfully.
Batch loading...
Tab 09_FL_0 uploaded successfully.
Batch loading...
Tab 10_FL_0 uploaded successfully.
Batch loading...
Tab 11_FL_0 uploaded successfully.
Batch loading...
Tab 12_FL_0 uploaded successfully.
Batch loading...
Tab 12_FL_0 uploaded successfully.
Batch loading...
Tab 12_FL_0 uploaded successfully.
All sheets loaded successfully. Outputs: https://docs.google.com/spreadsheets/d/1d6zfhj8U4lakF_SdwEw8orVmVr5Tvhvt3PSfoEH-DeE
