From 91da8c7307caa7e2df4b1edd0eaa6a1c865fab27 Mon Sep 17 00:00:00 2001
From: blancoapa <pablb@ntnu.no>
Date: Tue, 4 Jun 2024 11:42:00 +0200
Subject: [PATCH] deprecate unused methods in analysis

---
 lib/analysis.py | 169 ------------------------------------------------
 1 file changed, 169 deletions(-)

diff --git a/lib/analysis.py b/lib/analysis.py
index 9cd119a..0372229 100644
--- a/lib/analysis.py
+++ b/lib/analysis.py
@@ -57,71 +57,6 @@ def analyze_time_series(path_to_datafolder):
                                         index=[len(data)])   
     return data
 
-def do_binning_analysis(list_time_series_df, frac_data_to_discard=0.3):
-    """
-    Does a binning analysis of all Pandas DataFrame objects in `list_time_series_df`.
-    
-    Args:
-        list_time_series_df(`lst`): List of Pandas DataFrame objects
-        frac_data_to_discard(`float`, optional): Fraction of data to be discarted in each Pandas DataFrame. Defaults to 0.3.
-        
-    Returns:
-        mean_series(`obj`): PandasSeries object with the mean, error of the mean, number of effective samples and correlation time.
-
-    Note:
-        - All Pandas DataFrame objects in `list_time_series_df` are merged into a single Pandas DataFrame object for the binning analysis.
-        - All Pandas DataFrame objects in `list_time_series_df` are assumed to have the same observables.
-        - The fraction of data to be discarted as equilibration is discarted in each of the Pandas DataFrame objects.
-    """
-    # Estimate the time interval in which the data was stored
-    gathered_binning_df=merge_time_series_dfs(list_time_series_df=list_time_series_df,
-                         frac_data_to_discard=frac_data_to_discard)
-    # Clean up the dataframe 
-    if "Unnamed: 0" in gathered_binning_df.columns:
-        gathered_binning_df=gathered_binning_df.drop(['Unnamed: 0'], axis=1)
-    # Do the binning analysis
-    mean_series=block_analyze(gathered_binning_df)
-    return mean_series
-
-def merge_time_series_dfs(list_time_series_df,frac_data_to_discard=0,rescale_time=False):
-    """
-    Merges all Pandas DataFrame objects in `list_time_series_df` and rescales the time.
-    
-    Args:
-        list_time_series_df(`lst`): List of Pandas DataFrame objects
-        frac_data_to_discard(`float`, optional): Fraction of data to be discarted in each Pandas DataFrame. Defaults to 0.
-        
-    Returns:
-        gathered_binning_df(`obj`): DataFrame object with all data.
-
-    Note:
-        - All Pandas DataFrame objects in `list_time_series_df` are assumed to have the same observables.
-        - The fraction of data to be discarted as equilibration is discarted in each of the Pandas DataFrame objects.
-    """
-    # Estimate the time interval in which the data was stored
-    dt = get_dt(list_time_series_df[0])
-    for index in range(len(list_time_series_df)):
-        binning_df=list_time_series_df[index]
-        # Drop the data corresponding to equilibration
-        drop_rows = int(binning_df.shape[0]*frac_data_to_discard)
-        binning_df  = binning_df.drop(range(0,drop_rows))
-        if rescale_time:         
-            # Make a continous time evolution
-            binning_df["time"]+=index*binning_df.shape[0]*dt
-        list_time_series_df[index]=binning_df
-
-    # Join all the dataframes for binning analysis
-    gathered_binning_df=pd.concat(list_time_series_df)
-    
-    if 'Unnamed: 0' in gathered_binning_df.columns:
-    # Clean up the dataframe 
-        gathered_binning_df=gathered_binning_df.drop(['Unnamed: 0'], axis=1)
-    
-    # Clean up the dataframe 
-    gathered_binning_df.reset_index(drop=True, inplace=True)
-    return gathered_binning_df
-
-
 def get_params_from_dir_name(name):
     """
     Gets the parameters from name assuming a structure 
@@ -191,43 +126,6 @@ def split_dataframe(df,n_blocks):
                                            block_size=block_size_s2)
     return blocks
 
-def get_time_series_from_average_df(pd_series, label):
-    """
-    Gets the time series from a PandasSeries object
-    
-    Args:
-        pd_series (`obj`): PandasSeries object
-        label (`str`): column name where the time series are stored
-    
-    Returns:
-        (`obj`): PandasDataFrame with the time series and their statistical error
-    """
-    import numpy as np
-    dist_dict={}
-    expected_labels=["mean", "errmean", "nsamples"]
-    for data_str in pd_series[label]:
-        clean_data_str=data_str.strip("{").strip("}")
-        for data_set in clean_data_str.split("]"):
-            data_list=data_set.split(":")
-            if len(data_list) == 1:
-                continue            
-            # Parse data and label
-            data=list(data_list[1][2:].split(","))
-            data=np.array([float(x) for x in data])
-            clean_label=data_list[0].strip("'").strip(",").strip()
-            label_sts=clean_label.split("_")[-1]
-            label_qty=clean_label[:-len(label_sts)-1].strip("'")            
-            if label_sts not in expected_labels:
-                raise ValueError(f"Error while parsing the df, found label for stats {label_sts}")
-
-            if label_qty in dist_dict.keys():
-                dist_dict[label_qty][label_sts]=data
-            else:
-                dist_dict[label_qty]={}
-                dist_dict[label_qty][label_sts]=data 
-    return pd.DataFrame.from_dict(dist_dict)
-
-
 def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1,  columns_to_analyze = "all", verbose = False):
     """
     Analyzes the data in `full_data` using a binning procedure.
@@ -331,73 +229,6 @@ def read_csv_file(path):
     else:
         return None
 
-def get_distribution_from_df(df, key):
-    """
-    Gets a list stored in a pandas dataframe `df`.
-
-    Args:
-        - df (`obj`): pandas dataframe
-        - key (`str`): column name where the list is stored
-
-    Returns:
-        distribution_list (`lst`): list stored under `key`
-
-    """
-    distribution_list=[]
-    for row in df[key]:
-        if pd.isnull(row):
-            continue
-        clean_row=row.strip("[").strip("]")
-        row_list=[]
-        for element in clean_row.split():
-            row_list.append(float(element))
-        distribution_list.append(row_list)
-    return distribution_list
-
-def create_histogram_df_from_distribution_list(distribution_list, start, end, nbins):
-    """
-    Does a histogram from the data stored in `distribution_list` and stores it into a pandas dataframe.
-
-    Args:
-        start (`float`): start point of the histogram
-        end (`float`): end point of the histogram
-        nbins (`int`): number of bins in the histogram
-
-    Returns:
-        `obj`: Pandas dataframe with the histogram counts
-    """
-
-    counts, x_bins = np.histogram(distribution_list[0], bins=nbins, range=(start,end))
-    dict_hist={}
-    for index in range(len(counts)):
-        dict_hist[x_bins[index]]=[]
-    dict_hist["time"]=[]
-    cnt=1
-    for data_set in distribution_list:
-        counts, _  = np.histogram(data_set, bins=nbins, range=(start,end))
-        norm=sum(counts)
-        for index in range(len(counts)):
-            dict_hist[x_bins[index]].append(counts[index]/norm)
-        dict_hist["time"].append(cnt)
-        cnt+=5000
-    return  pd.DataFrame.from_dict(dict_hist)
-
-def find_index_with_value_in_df(df,column_name, value, tol=0.01):
-    """
-    Finds the index in the pandas DataFrame `df` with a column `column_name` and a row `value`.
-
-    Args:
-        df (`obj`): Pandas DataFrame.
-        column_name (`str`): Label of the column.
-        value (`any`): Value to be found in `df[column_name]`.
-        tol (`float`): Tolerance in value.
-
-    Returns:
-        index (int): Index found.
-    """
-    index = np.where(abs(df[column_name]-value)/value < tol)
-    return index[0]
-
 def built_output_name(input_dict):
     """
     Builts the output name for a given set of input parameters.