# Programmatically Bringing Data into Excel from Multiple Sources as One Excel Workbook

* The purpose of this notebook is to show how Python can be used to programmatically bring CSV data into Excel.

## Setting Up the Notebook

In [None]:
from pandas import DataFrame, Series
import pandas as pd
import os
import glob
import csv

### Previously Created Functions


In [None]:
def create_dataframe_file_from_csv(csv_file_path, **options):
    import pandas as pd
    
    options_passed = dict(options)
    
    df = pd.read_csv(csv_file_path, **options_passed)
    return df
        
def create_xlsx_file_from_df(dataframe, xlsx_file_path, sheet_name = None):
    import pandas as pd
    
    writer = pd.ExcelWriter(xlsx_file_path)
    
    if not sheet_name:
        dataframe.to_excel(writer)
    else:
        dataframe.to_excel(writer, sheet_name = sheet_name)
    writer.save()
    return

def create_xlsx_file_from_csv(csv_file_path, xlsx_file_path, **options):
    import pandas as pd
    
    options_passed = dict(options)
    sheet_name = options.get('sheet_name')
    if sheet_name:
        options_passed.pop('sheet_name')
    
    df = create_dataframe_file_from_csv(csv_file_path, **options_passed)
    
    if not sheet_name:
        create_xlsx_file_from_df(df, xlsx_file_path, sheet_name = sheet_name)
    else:
        create_xlsx_file_from_df(df, xlsx_file_path)
    return

### Concatenating the Files

- Assumptions:
  1. All CSV files are in the same directory.
  2. All CSV files have the same column headers.
- The procedure:
  1. Read in all of the CSV files and concatenate them into one `DataFrame`.
  2. Call `create_single_xlsx_from_csv` to create the xlsx file.

In [None]:
def csv_concat(source_files, destination_file):
    first = True
    for csvfile in glob.glob(os.path.join('.', source_files)):
        temp_df = pd.read_csv(csvfile, index_col = False)
    
        if first:
            final_df = DataFrame(columns = temp_df.columns)
            first = False
        
        final_df = pd.concat([final_df, temp_df], ignore_index = True, axis = 0)
    
    final_df.sort_values(by = ['date'], axis = 0, inplace = True)

    create_xlsx_file_from_df(final_df, destination_file)
    

In [None]:
csv_concat(r'Source_files/google_20*.csv', 'Destination_files/multiple_sources.xlsx')

## To Different Sheets


In [None]:
def multiple_sheets(source_file, destination_file):
    writer = pd.ExcelWriter(destination_file, engine = 'xlsxwriter')

    with open(source_file, 'r') as fh:
        for line in fh:
            if 'quit' in line: break
            csv_file, sheet_name = line.split()
            df = create_dataframe_file_from_csv(csv_file)
            df.to_excel(writer, sheet_name = sheet_name)

    writer.save()

In [None]:
multiple_sheets('Source_files/csv_sheet.txt', 'Destination_files/pandas_multiple.xlsx')

# End of Notebook