In [None]:
import pandas as pd

In [None]:
#read csv or json into data frame fucntion
def read_file(file_path):
    """
    Read a file and return a DataFrame based on the file extension.

    Args:
        file_path (str): Path to the file.

    Returns:
        pandas.DataFrame: DataFrame containing the file data.
    """
    if file_path.endswith('.json'):
        return pd.read_json(file_path)
    elif file_path.endswith('.csv'):
        return pd.read_csv(file_path)
    else:
        print("Error: Invalid file extension.")
        return None

#example json_file_path = 'path/to/file.json'
#example json_df = read_file(json_file_path)

In [None]:
#describe the dataframe 

def describe_dataframe(df):
    """
    Generate descriptive statistics for each numerical column in a Pandas DataFrame.

    Args:
        data (pandas.DataFrame): DataFrame for which to generate descriptive statistics.

    Returns:
        pandas.DataFrame: DataFrame containing descriptive statistics.
    """
    output = df.describe()
    return output

In [None]:
#find missing data and give some common analytics on those values 

def find_missing_data(df):
    """
    Find missing data in a Pandas DataFrame.

    Args:
        data (pandas.DataFrame): DataFrame to check for missing data.

    Returns:
        pandas.DataFrame: DataFrame summarizing missing data.
    """
    #gets rid of all rows that start with \n as they do not have any data
    df = df[~df['video_id'].str.startswith("\n")]
    
    missing_v = df.isnull().sum()
    missing_data = pd.DataFrame(missing_v, columns=['Missing or NA'])
    missing_data = missing_data[missing_data['Missing Values'] > 0].sort_values(by='Missing Values', ascending=False)
    return missing_data

In [None]:
#get column and show featuers(data type)

def get_column_featuers(df):
    """provides column dict listing and data type for pandas dataframe

    Args:
        df (pandas.DataFrame): DataFrame for which to retrieve the features and data types.

        
    Returns:
        dict: Dictionary containing column names as keys and their data types as values.
    """
    feats = {}
    for c in df.columns:
        feats[c] = df[c].dtype 
    return feats

In [None]:
#gets rid of all rows that start with \n as they do not have any data

def get_rid_of_null(df):
    """gets rid of all rows that start with \n as they do not have any data

    Args:
        df (pandas.DataFrame): DataFrame
        
    Returns:
        pandas.DataFrame: DataFrame with rows removed that meet this critera
        
    """
    df = df[~df['video_id'].str.startswith("\n")]
    return df

In [None]:
if __name__ == "__main__":