# MyHelper
## Helper functions that can assist with everyday tasks

In [40]:
import pandas as pd

# Sample data
data = {'Category': ['A', 'B', 'A', 'A', 'C', 'B', 'C', 'A', 'B', 'C']}
df = pd.DataFrame(data)

In [42]:
df

Unnamed: 0,Category
0,A
1,B
2,A
3,A
4,C
5,B
6,C
7,A
8,B
9,C


In [43]:
import seaborn as sns
mpg = sns.load_dataset("mpg")

In [44]:
import pandas as pd

def val_counts(df_column):
    """
    Example usage:
Assuming df is your DataFrame and 'column_name' is the name of the column you want to analyze
result = val_counts(df['column_name'])
print(result)
    """
    # Calculate counts
    counts = df_column.value_counts(dropna=False)
    total_count = counts.sum()
    
    # Calculate normalized counts
    normalized_counts = counts / total_count
    
    # Calculate cumulative counts
    cumulative_counts = counts.cumsum()
    
    # Calculate cumulative normalized counts
    cumulative_normalized_counts = cumulative_counts / total_count
    
    # Create DataFrame
    result_df = pd.DataFrame({
        'counts': counts,
        'normalized': normalized_counts,
        'cumulative_counts': cumulative_counts,
        'cumulative_normalized': cumulative_normalized_counts
    })
    
    # Add totals row
    total_row = pd.Series({
        'counts': total_count,
        'normalized': 1.0,
        'cumulative_counts': total_count,
        'cumulative_normalized': 1.0
    }, name='Total')
    
    result_df.loc["Total"] = total_row
    
    return result_df




In [45]:
val_counts(mpg.origin)

Unnamed: 0_level_0,counts,normalized,cumulative_counts,cumulative_normalized
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
usa,249.0,0.625628,249.0,0.625628
japan,79.0,0.198492,328.0,0.824121
europe,70.0,0.175879,398.0,1.0
Total,398.0,1.0,398.0,1.0


In [46]:
val_counts(df.Category)

Unnamed: 0_level_0,counts,normalized,cumulative_counts,cumulative_normalized
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,4.0,0.4,4.0,0.4
B,3.0,0.3,7.0,0.7
C,3.0,0.3,10.0,1.0
Total,10.0,1.0,10.0,1.0


In [65]:
import pandas as pd
from datetime import datetime

def excel(dataframe, excel_filename=None, sheet_name=None, index=False):
    """
    Save dataframe to Excel in the current working directory.
    Parameters: dataframe, excel_filename=None, sheet_name=None, index=False
    """
    dataframe_as_string = [x for x in globals() if globals()[x] is dataframe][0]
    if excel_filename is None:
        tm = datetime.now().strftime('%m%d%Y_%H.%M_Hrs')
        excel_filename = f"{dataframe_as_string}_{tm}.xlsx"
        
    if sheet_name is None:
        sheet_name = dataframe_as_string
    
    try:
        with pd.ExcelWriter(excel_filename, mode='a', engine='openpyxl',if_sheet_exists='replace') as writer:
                # Write the DataFrame to the Excel file
            dataframe.to_excel(writer, sheet_name=sheet_name, index=index)
            print('Updated file', excel_filename, ', sheet:', sheet_name)
    except FileNotFoundError:
        # File does not exist, create a new one
        dataframe.to_excel(excel_filename, sheet_name=sheet_name, index=index)
        print('Created new file', excel_filename, ', sheet:', sheet_name)

# Example usage:
# Assuming df is your DataFrame
# save_to_excel(df, excel_filename='output.xlsx', sheet_name='Sheet1')


In [67]:
excel(mpg,)

Created new file mpg_04172024_13.22_Hrs.xlsx , sheet: mpg


In [68]:
excel(df)

Created new file df_04172024_13.22_Hrs.xlsx , sheet: df
