In [None]:
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np


######################################################################################################
############################################################################################### MAIN #
######################################################################################################

def main():
    print("Welcome to The Calculeaner, \nprogrammed by Laura Vodden")
    display_main_menu()
    ans = input(">>>")

    while ans != "7":
        if ans == "1":
            print("""You have selected option 1 - Load data from a file.
            """)
            df = load_data()
        elif ans == "2":
            print("")
            print("""You have selected option 2 - View data.
            """)
            try:
                view_data(df)
            except UnboundLocalError:
                print("No data have been loaded")
        elif ans == "3":
            print("")
            print("""You have selected option 3 - Clean data.
            """)
            try:
                df = clean_data(df)
            except UnboundLocalError:
                print("No data have been loaded")
        elif ans == "4":
            print("")
            print("""You have selected option 4 - Analyse data.
            """)
            try:
                analyse_data(df)
            except UnboundLocalError:
                print("No data have been loaded")
        elif ans == "5":
            print("")
            print("""You have selected option 5 - Visualise data.
            """)
            try:
                visualise_data(df)
            except UnboundLocalError:
                print("No data have been loaded")
        elif ans == "6":
            print("")
            print("""You have selected option 6 - Save data to a file.
            """)
            print("")
            try:
                save_data_to_file(df)
            except UnboundLocalError:
                print("No data have been loaded")
        else:
            print("")
            print("Invalid selection")
        display_main_menu()
        ans = input(">>>")
    print("")
    print("Bye!")


######################################################################################################
########################################################################################## LOAD DATA #
######################################################################################################

def enter_filename():
    print("""Enter name of file to open:""")
    filename = input(">>>")
    return filename

def load_data():
    filename = enter_filename()
    try:
        df = pd.read_csv(filename)
        df = df.replace(0, np.nan) 
        print("")
        print(filename, "has been loaded.")
        print("")
        print("Columns:")
        col_list = df.columns.tolist()
        for column in df:
            print(column)
        print("")
        column_index = set_column_index()
        while column_index not in col_list:
            if column_index == "":
                print("No column index chosen.")
                df = pd.read_csv(filename)
                df = df.replace(0, np.nan)
                return df
            else:
                print("Chosen column is not in dataframe")
                column_index = set_column_index()
        df = pd.read_csv(filename, index_col= column_index)
        df = df.replace(0, np.nan)
        print("")
        print(column_index, "set as column index")
        return df
    except FileNotFoundError:
        print("File not found. Please enter a valid filename. ")
    except ValueError:
        print("Bad format in file")    

def set_column_index():
    print("Would you like to set any of the columns as an index? \nEnter a name or press enter.")
    column_index = (input(">>>"))
    return column_index


######################################################################################################
########################################################################################## VIEW DATA #
######################################################################################################

def view_data(df):
    print("")
    print(df)


######################################################################################################
######################################################################################### MENU INPUT #
######################################################################################################

def display_main_menu():
    print("")
    print("")
    print("")
    print("""Please choose from the following options:""")
    print ("""
    1 - Load data from a file
    2 - View data
    3 - Clean data
    4 - Analyse data
    5 - Visualise data
    6 - Save data to file
    7 - Quit
    """)



######################################################################################################
######################################################################################### CLEAN DATA #
######################################################################################################

def clean_data(df):
    display_clean_data_menu()
    ans = input(">>>")
    while ans != "6":
        if ans == "1":
            df = drop_rows(df)
            return df
        elif ans == "2":
            df = fill_missing_values(df)
            return df
        elif ans == "3":
            df = drop_duplicates(df)
            return df
        elif ans == "4":
            df = drop_column(df)
            return df
        elif ans == "5":
            df = rename_column(df)
            return df
        else:
            print("Invalid selection")
        display_clean_data_menu()
        ans = input(">>>")
    print("Finish cleaning")

def display_clean_data_menu():
    print("")
    print("""Please select from the following options:
    1 - Drop rows with missing values
    2 - Fill missing values
    3 - Drop duplicate rows
    4 - Drop column
    5 - Rename column
    6 - Finish cleaning""")
    print("")


def drop_rows(df):
    print("")
    print("You have selected option 1 - Drop rows with missing values.")
    number_of_columns = len(df.columns)
    threshold = get_threshold_value()
    threshold = number_of_columns - threshold + 1
    df = df.dropna(axis=0, how='any', thresh=threshold, subset=None, inplace=False)
    print("")
    return df

def get_threshold_value():                                                                      
    try:
        print("Enter threshold value for missing values: ")
        threshold = int(input(">>>"))
        while threshold < 0:
            threshold = input("Invalid selection. Please make a selection: ")
        return threshold
    except TypeError:
        print("TypeError from get_user_value")
    except ValueError:
        print("Invalid selection.")

def fill_missing_values(df):
    print("")
    print("You have selected option 2 - Fill missing values.")
    fill_value = get_fill_value()
    df = df.fillna(fill_value) 
    print("")
    print("All missing values replaced with", fill_value)
    print(df)
    return df

def get_fill_value():
    try:    
        print("What value do you want to replace missing values with?")
        fill_value = int(input(">>>"))
    except ValueError:
        print("Invalid input. Please enter a valid integer.")
        fill_value = int(input(">>>"))
    return fill_value

def drop_duplicates(df):
    print("")
    print("You have selected option 3 - Drop duplicate rows.")
    df = df.drop_duplicates()
    print("Any duplicate rows have been dropped.")
    return df

def drop_column(df):
    print("")
    print("You have selected option 4 - Drop column.")
    column_name = get_column_to_delete(df)
    df = df.drop(column_name, 1)
    print("")
    print(column_name, "has been removed from the dataset.")
    return df

def get_column_to_delete(df):
    print("Which column would you like to delete?")
    column_names = df.columns.tolist()
    column_names = ', '.join(column_names)
    print(column_names)
    column_name = input(">>>")
    if column_name not in column_names:
        print("Column name does not exist. \n Please enter a valit column name. ")
        column_name = input(">>>")
    else:
        return column_name
    return column_name

def rename_column(df):
    print("")
    print("You have selected option 5 - Rename column.")
    column_names = df.columns.tolist()
    column_names = ', '.join(column_names)
    print(column_names)
    old_name = get_old_name(df)
    print(old_name)
    new_name = get_new_name()
    print(new_name)
    col_list = df.columns.tolist()
    if new_name in col_list:
        print("Name already exists in dataframe. Please choose a new name.")
        new_name = get_new_name()
    else:
        df = df.rename(columns = {old_name:new_name}, inplace = False)
    df = df.rename(columns = {old_name:new_name}, inplace = False)
    print(df)
    return df

def get_old_name(df):
    print("")
    print("Which column would you like to rename?")
    old_name = input(">>>")
    return old_name

def get_new_name():
    print("Please enter a new name: ")
    new_name = input(">>>")
    return new_name


######################################################################################################
####################################################################################### ANALYSE DATA #
######################################################################################################

def analyse_data(df):
    for column in df:
        print("")
        print("")
        print(column)
        length = len(column)
        i = 0
        while i < length:
            print("-", end = "")
            i = i + 1
        print(sep = ", ")

        calculate_number_values(column)
        calculate_minimum(df, column)
        calculate_maximum(df, column)
        calculate_mean(df, column)
        calculate_median(df, column)
        calculate_standard_deviation(df, column)
        calculate_standard_error_mean(df, column)

    print("")
    print("")
    print("")
    pearson_title = "Pearson table of correlations: "
    print(pearson_title)
    len_title = len(pearson_title)
    print('-'*len_title)
    compute_pearson(df)

def calculate_number_values(column):
    number_of_values = len(column)
    print("number_of_values (n): ", number_of_values-1)

def calculate_minimum(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len2 = len("minimum:")  
    minimum = df[column].min(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len2)*format_placeholder, "minimum: ", "{:01.2f}".format(minimum)) 

def calculate_maximum(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len3 = len("maximum:")
    maximum = df[column].max(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len3)*format_placeholder, "maximum: ", "{:01.2f}".format(maximum)) 

def calculate_mean(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len4 = len("mean:")
    mean = df[column].mean(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len4)*format_placeholder, "mean: ", "{:01.2f}".format(mean)) 

def calculate_median(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len5 = len("median:") 
    median = df[column].median(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len5)*format_placeholder, "median: ", "{:01.2f}".format(median))

def calculate_standard_deviation(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len6 = len("standard deviation:") 
    standard_deviation = df[column].std(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len6)*format_placeholder, "standard deviation: ", "{:01.2f}".format(standard_deviation))

def calculate_standard_error_mean(df, column):
    format_placeholder = ' '
    len1 = len("number of values (n)")
    len7 = len("std. err. of mean:") 
    standard_error_mean = df[column].sem(axis=None, skipna=None, level=None, numeric_only=None)
    print((len1-len7)*format_placeholder, "std. err. of mean: ", "{:01.2f}".format(standard_error_mean))

def compute_pearson(df):
    pearson_corr = df.corr(method='pearson', min_periods=1)
    print(pearson_corr)


######################################################################################################
##################################################################################### VISUALISE DATA #
######################################################################################################


def visualise_data(df):

    print("""What kind of plot would you like?
         1 - Bar graph
         2 - Line graph
         3 - Boxplot
         """)
    ans = input(">>>")
    if ans == "1":
        plot_kind = 'bar'
    elif ans == "2":
        plot_kind = 'line'
    elif ans == "3":
        plot_kind = 'box'
    else:
        print("Invalid selection")
        ans = input(">>>")

    print("""Do you want to use subplots?
         1 - Yes
         2 - No""")
    subplots = input(">>>")

    print("")
    print("Please add a title or press enter to skip.")
    plot_title = input(">>>")

    print("")
    print("Please add an x axis label or press enter to skip.")
    x_axis_label = input(">>>")

    print("")
    print("Please add a y axis label or press enter to skip.")
    y_axis_label = input(">>>")

    if subplots == "1":
        df.plot(x = None, y = None, kind = plot_kind, subplots = True, title = plot_title, xlabel = x_axis_label, ylabel = y_axis_label)
        plt.show()
    elif subplots == "2":
        df.plot(x = None, y = None, kind = plot_kind, subplots = False, title = plot_title, xlabel = x_axis_label, ylabel = y_axis_label)
        plt.show()

######################################################################################################
################################################################################## SAVE DATA TO FILE #
######################################################################################################

def save_data_to_file(df):
    new_filename = assign_filename()
    df.to_csv (new_filename, index = False, header=True)

def assign_filename():
    try:
        print("")
        print("Please enter a file name, complete with .csv: ")
        print("")
        new_filename = input(">>>")
        while len(new_filename) < 1:
            new_filename = input("Name cannot be blank. Please enter a new name: ")
        print("")
        print(new_filename, "has been saved to this directory.")
        return new_filename
    except ValueError:
        print("Invalid input. ")




main()



Welcome to The Calculeaner, 
programmed by Laura Vodden



Please choose from the following options:

    1 - Load data from a file
    2 - View data
    3 - Clean data
    4 - Analyse data
    5 - Visualise data
    6 - Save data to file
    7 - Quit
    
