In [None]:
import pandas as pd
import numpy as np

# Assuming your DataFrame is named 'df' and contains 'issue_due_date' and 'remediation_date' columns
# Convert 'issue_due_date' and 'remediation_date' to datetime if not already in datetime format
df['issue_due_date'] = pd.to_datetime(df['issue_due_date'])
df['remediation_date'] = pd.to_datetime(df['remediation_date'])

# Calculate the difference between 'issue_due_date' and 'remediation_date' in days and convert to whole numbers
df['days_difference'] = (df['issue_due_date'] - df['remediation_date']).dt.days.astype('Int64')

# Define conditions and corresponding buckets
conditions = [
    (df['days_difference'] < -365),
    (df['days_difference'] >= -365) & (df['days_difference'] <= -100),
    (df['days_difference'] >= -99) & (df['days_difference'] <= -29),
    (df['days_difference'] >= -28) & (df['days_difference'] <= -22),
    (df['days_difference'] >= -21) & (df['days_difference'] <= -15),
    (df['days_difference'] >= -14) & (df['days_difference'] <= -8),
    (df['days_difference'] >= -7) & (df['days_difference'] <= -1),
    (df['days_difference'] == 0),
    (df['days_difference'] >= 1) & (df['days_difference'] <= 7),
    (df['days_difference'] >= 8) & (df['days_difference'] <= 14),
    (df['days_difference'] >= 15) & (df['days_difference'] <= 21),
    (df['days_difference'] >= 22) & (df['days_difference'] <= 28),
    (df['days_difference'] >= 29) & (df['days_difference'] <= 99),
    (df['days_difference'] >= 100) & (df['days_difference'] <= 365),
    (df['days_difference'] > 365)
]

# Corresponding bucket labels
buckets = [
    "<-365 days before",
    ">365 days before",
    "100-365 days before",
    "29-99 days before",
    "22-28 days before",
    "15-21 days before",
    "8-14 days before",
    "1-7 days before",
    "On the due date",
    "1-7 days after",
    "8-14 days after",
    "15-21 days after",
    "22-28 days after",
    "29-99 days after",
    "100-365 days after",
    ">365 days after"
]

# Create a new column 'bucket' based on the conditions
df['bucket'] = pd.Series(np.select(conditions, buckets, default=''), dtype='str')

# Drop the temporary column 'days_difference' if not needed
df.drop('days_difference', axis=1, inplace=True)


In [None]:
#Remediation
import pandas as pd
import numpy as np

# Assuming your DataFrame is named 'df' and contains 'issue_due_date' and 'remediation_date' columns
# Convert 'issue_due_date' and 'remediation_date' to datetime if not already in datetime format
df['issue_due_date'] = pd.to_datetime(df['issue_due_date'])
df['remediation_date'] = pd.to_datetime(df['remediation_date'])

# Calculate the difference between 'issue_due_date' and 'remediation_date' in days and convert to whole numbers
df['days_difference'] = (df['issue_due_date'] - df['remediation_date']).dt.days.astype('Int64')

# Define conditions and corresponding buckets
conditions = [
    (df['days_difference'] < -365),
    (df['days_difference'] >= -365) & (df['days_difference'] <= -100),
    (df['days_difference'] >= -99) & (df['days_difference'] <= -29),
    (df['days_difference'] >= -28) & (df['days_difference'] <= -22),
    (df['days_difference'] >= -21) & (df['days_difference'] <= -15),
    (df['days_difference'] >= -14) & (df['days_difference'] <= -8),
    (df['days_difference'] >= -7) & (df['days_difference'] <= -1),
    (df['days_difference'] == 0),
    (df['days_difference'] >= 1) & (df['days_difference'] <= 7),
    (df['days_difference'] >= 8) & (df['days_difference'] <= 14),
    (df['days_difference'] >= 15) & (df['days_difference'] <= 21),
    (df['days_difference'] >= 22) & (df['days_difference'] <= 28),
    (df['days_difference'] >= 29) & (df['days_difference'] <= 99),
    (df['days_difference'] >= 100) & (df['days_difference'] <= 365),
    (df['days_difference'] > 365)
]

# Corresponding bucket labels
buckets = [
    "<-365 days before",
    ">365 days before",
    "100-365 days before",
    "29-99 days before",
    "22-28 days before",
    "15-21 days before",
    "8-14 days before",
    "1-7 days before",
    "On the due date",
    "1-7 days after",
    "8-14 days after",
    "15-21 days after",
    "22-28 days after",
    "29-99 days after",
    "100-365 days after",
    ">365 days after"
]

# Create a new column 'bucket' based on the conditions
df['bucket'] = pd.Series(np.select(conditions, buckets, default=''), dtype='str')

# Drop the temporary column 'days_difference' if not needed
df.drop('days_difference', axis=1, inplace=True)

# Save the DataFrame to a new sheet named "Remediation" in the same Excel workbook
with pd.ExcelWriter('your_workbook.xlsx', engine='openpyxl') as writer:
    writer.book = writer.sheets['Remediation']
    df.to_excel(writer, sheet_name='Remediation', index=False)


In [None]:
#At Risk 
import pandas as pd
import numpy as np

# Assuming your DataFrame is named 'df' and contains 'marked_at_risk_date', 'due_date', and 'status' columns
# Convert 'marked_at_risk_date' and 'due_date' to datetime if not already in datetime format
df['marked_at_risk_date'] = pd.to_datetime(df['marked_at_risk_date'])
df['due_date'] = pd.to_datetime(df['due_date'])

# Create a condition to filter rows where status was ever marked as "risk" for each ID
condition_risk = df.groupby('ID')['status'].transform(lambda x: 'risk' in x.values)

# Filter the DataFrame for entries where the ID was ever marked at risk
df_at_risk = df[condition_risk]

# Calculate the difference between 'marked_at_risk_date' and 'due_date' in days and convert to whole numbers
df_at_risk['days_difference_risk'] = (df_at_risk['marked_at_risk_date'] - df_at_risk['due_date']).dt.days.astype('Int64')

# Define conditions and corresponding buckets for "At risk"
conditions_risk = [
    (df_at_risk['days_difference_risk'] < -365),
    (df_at_risk['days_difference_risk'] >= -365) & (df_at_risk['days_difference_risk'] <= -100),
    (df_at_risk['days_difference_risk'] >= -99) & (df_at_risk['days_difference_risk'] <= -29),
    (df_at_risk['days_difference_risk'] >= -28) & (df_at_risk['days_difference_risk'] <= -22),
    (df_at_risk['days_difference_risk'] >= -21) & (df_at_risk['days_difference_risk'] <= -15),
    (df_at_risk['days_difference_risk'] >= -14) & (df_at_risk['days_difference_risk'] <= -8),
    (df_at_risk['days_difference_risk'] >= -7) & (df_at_risk['days_difference_risk'] <= -1),
    (df_at_risk['days_difference_risk'] == 0),
    (df_at_risk['days_difference_risk'] >= 1) & (df_at_risk['days_difference_risk'] <= 7),
    (df_at_risk['days_difference_risk'] >= 8) & (df_at_risk['days_difference_risk'] <= 14),
    (df_at_risk['days_difference_risk'] >= 15) & (df_at_risk['days_difference_risk'] <= 21),
    (df_at_risk['days_difference_risk'] >= 22) & (df_at_risk['days_difference_risk'] <= 28),
    (df_at_risk['days_difference_risk'] >= 29) & (df_at_risk['days_difference_risk'] <= 99),
    (df_at_risk['days_difference_risk'] >= 100) & (df_at_risk['days_difference_risk'] <= 365),
    (df_at_risk['days_difference_risk'] > 365)
]

# Corresponding bucket labels for "At risk"
buckets_risk = [
    "<-365 days before",
    ">365 days before",
    "100-365 days before",
    "29-99 days before",
    "22-28 days before",
    "15-21 days before",
    "8-14 days before",
    "1-7 days before",
    "On the due date",
    "1-7 days after",
    "8-14 days after",
    "15-21 days after",
    "22-28 days after",
    "29-99 days after",
    "100-365 days after",
    ">365 days after"
]

# Create a new column 'bucket_risk' based on the conditions for "At risk"
df_at_risk['bucket_risk'] = pd.Series(np.select(conditions_risk, buckets_risk, default=''), dtype='str')

# Save the DataFrame to a new sheet named "At risk" in the same Excel workbook
with pd.ExcelWriter('your_workbook.xlsx', engine='openpyxl', mode='a') as writer:
    df_at_risk.to_excel(writer, sheet_name='At risk', index=False)


In [None]:
#over vs marked risk
import pandas as pd
import numpy as np

# Assuming your DataFrame is named 'df' and contains 'marked_at_risk_date', 'due_date', and 'status' columns
# Convert 'marked_at_risk_date', 'due_date', and 'remediation_date' to datetime if not already in datetime format
df['marked_at_risk_date'] = pd.to_datetime(df['marked_at_risk_date'])
df['due_date'] = pd.to_datetime(df['due_date'])
df['remediation_date'] = pd.to_datetime(df['remediation_date'])

# Create a condition to filter rows where status was ever marked as "risk" for each ID
condition_risk = df.groupby('ID')['status'].transform(lambda x: 'risk' in x.values)

# Filter the DataFrame for entries where the ID was ever marked at risk and "Due date" < "Remediation date"
df_overdue_at_risk = df[condition_risk & (df['due_date'] < df['remediation_date'])]

# Calculate the difference between 'due_date' and 'marked_at_risk_date' in days and convert to whole numbers
df_overdue_at_risk['days_difference_overdue_at_risk'] = (df_overdue_at_risk['due_date'] - df_overdue_at_risk['marked_at_risk_date']).dt.days.astype('Int64')

# Define conditions and corresponding buckets for "Overdue and at risk"
conditions_overdue_at_risk = [
    (df_overdue_at_risk['days_difference_overdue_at_risk'] < -365),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -365) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -100),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -99) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -29),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -28) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -22),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -21) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -15),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -14) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -8),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= -7) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= -1),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] == 0),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 1) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 7),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 8) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 14),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 15) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 21),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 22) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 28),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 29) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 99),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] >= 100) & (df_overdue_at_risk['days_difference_overdue_at_risk'] <= 365),
    (df_overdue_at_risk['days_difference_overdue_at_risk'] > 365)
]

# Corresponding bucket labels for "Overdue and at risk"
buckets_overdue_at_risk = [
    "<-365 days before",
    ">365 days before",
    "100-365 days before",
    "29-99 days before",
    "22-28 days before",
    "15-21 days before",
    "8-14 days before",
    "1-7 days before",
    "On the due date",
    "1-7 days after",
    "8-14 days after",
    "15-21 days after",
    "22-28 days after",
    "29-99 days after",
    "100-365 days after",
    ">365 days after"
]

# Create a new column 'bucket_overdue_at_risk' based on the conditions for "Overdue and at risk"
df_overdue_at_risk['bucket_overdue_at_risk'] = pd.Series(np.select(conditions_overdue_at_risk, buckets_overdue_at_risk, default=''), dtype='str')

# Save the DataFrame to a new sheet named "Overdue and at risk" in the same Excel workbook
with pd.ExcelWriter('your_workbook.xlsx', engine='openpyxl', mode='a') as writer:
    df_overdue_at_risk.to_excel(writer, sheet_name='Overdue and at risk', index=False)


In [None]:
import pandas as pd

# Load data from each sheet in different workbooks
df_data = pd.read_excel('workbook_data.xlsx', sheet_name='Data')
df_remediation = pd.read_excel('workbook_remediation.xlsx', sheet_name='Remediation')
df_at_risk = pd.read_excel('workbook_at_risk.xlsx', sheet_name='At risk')
df_overdue_at_risk = pd.read_excel('workbook_overdue_at_risk.xlsx', sheet_name='Overdue and at risk')

# Concatenate DataFrames along the rows
df_master = pd.concat([df_data, df_remediation, df_at_risk, df_overdue_at_risk], ignore_index=True)

# Save the master DataFrame to a new sheet in the master workbook
with pd.ExcelWriter('master_workbook.xlsx', engine='openpyxl') as writer:
    df_master.to_excel(writer, sheet_name='Master', index=False)


In [None]:
import pandas as pd

# Assuming your DataFrame is named 'df'

# Find the second-top change_date for each group
second_top_dates = (
    df.groupby('ID')['change_date']
    .agg(lambda x: x.nlargest(2).iloc[-1] if len(x) > 1 else x.min())
    .reset_index(name='second_top_date')
)

# Merge the second-top dates back into the original DataFrame
df = pd.merge(df, second_top_dates, on='ID', how='left')

# Create a new column with the minimum change_date for each group
df['min_change_date'] = df.groupby('ID')['change_date'].transform('min')

# Fill NaN values in 'second_top_date' with the respective minimum change_date
df['second_top_date'].fillna(df['min_change_date'], inplace=True)

# Drop the temporary columns if not needed
df.drop(['min_change_date'], axis=1, inplace=True)


In [None]:
df['date'] = pd.to_datetime(df['date'])

# Group the DataFrame by 'ID' and find the second maximum date for each group
second_max_dates = df.groupby('ID')['date'].nlargest(2).reset_index(level=1, drop=True).groupby('ID').min()

# Merge the second maximum dates back to the original DataFrame based on 'ID'
result_df = df.merge(second_max_dates, left_on='ID', right_index=True, suffixes=('', '_second_max'))

# Rename the column to 'second_max_date'
result_df.rename(columns={'date': 'second_max_date'}, inplace=True)

In [None]:
import pandas as pd
from tkinter import filedialog
from IPython import get_ipython

# Ensure Tkinter is properly initialized in IPython
get_ipython().run_line_magic('gui', 'tk')

# Ask the user to select a CSV file
file_path = filedialog.askopenfilename(title="Select a CSV file", filetypes=[("CSV files", "*.csv")])

# Check if a file was selected
if file_path:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Display the DataFrame or perform further operations
    print("DataFrame from selected file:")
    print(df)
else:
    print("No file selected.")


In [None]:
import pandas as pd
from tkinter import filedialog
from IPython import get_ipython

# Ensure Tkinter is properly initialized in IPython
get_ipython().run_line_magic('gui', 'tk')

# Ask the user to select a CSV or Excel file
file_path = filedialog.askopenfilename(
    title="Select a file",
    filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")]
)

# Check if a file was selected
if file_path:
    # Determine the file type and read into a DataFrame accordingly
    if file_path.lower().endswith('.csv'):
        df = pd.read_csv(file_path)
    elif file_path.lower().endswith('.xlsx'):
        df = pd.read_excel(file_path)
    else:
        print("Unsupported file format. Please select a CSV or Excel file.")
        df = None

    # Display the DataFrame or perform further operations
    if df is not None:
        print("DataFrame from selected file:")
        print(df)
else:
    print("No file selected.")


In [None]:
import pandas as pd

# Ask the user to enter the file path
file_path = input("Enter the full path of the CSV or Excel file: ")

# Check if a file path was provided
if file_path:
    # Determine the file type and read into a DataFrame accordingly
    if file_path.lower().endswith('.csv'):
        df = pd.read_csv(file_path)
    elif file_path.lower().endswith('.xlsx'):
        df = pd.read_excel(file_path)
    else:
        print("Unsupported file format. Please provide a path to a CSV or Excel file.")
        df = None

    # Display the DataFrame or perform further operations
    if df is not None:
        print("DataFrame from selected file:")
        print(df)
else:
    print("No file path provided.")


In [None]:
import pandas as pd

# Ask the user to enter the file path
file_path = input("Enter the full path of the CSV or Excel file: ")

# Check if a file path was provided
if file_path:
    # Determine the file type and read into a DataFrame accordingly
    if file_path.lower().endswith('.csv'):
        try:
            # Try reading with UTF-8 first
            df = pd.read_csv(file_path, encoding='utf-8')
        except UnicodeDecodeError:
            # If UTF-8 fails, try a different encoding (e.g., 'latin-1' or 'ISO-8859-1')
            df = pd.read_csv(file_path, encoding='latin-1')
    elif file_path.lower().endswith('.xlsx'):
        df = pd.read_excel(file_path)
    else:
        print("Unsupported file format. Please provide a path to a CSV or Excel file.")
        df = None

    # Display the DataFrame or perform further operations
    if df is not None:
        print("DataFrame from selected file:")
        print(df)
else:
    print("No file path provided.")


In [None]:
import pandas as pd
import io
from IPython.display import display
import ipywidgets as widgets

# Function to handle file upload and processing
def handle_upload(change):
    uploaded_file = upload_button.value
    if uploaded_file:
        # Read the file into a DataFrame
        content = uploaded_file['content']
        file_extension = uploaded_file['metadata']['name'].split('.')[-1]

        if file_extension.lower() == 'csv':
            df = pd.read_csv(io.BytesIO(content))
        elif file_extension.lower() in ['xls', 'xlsx']:
            df = pd.read_excel(io.BytesIO(content))
        else:
            print("Unsupported file format. Please upload a CSV or Excel file.")
            return

        # Display the DataFrame or perform further operations
        print("DataFrame from uploaded file:")
        display(df)

# Create a FileUpload widget
upload_button = widgets.FileUpload(accept='.csv, .xlsx', multiple=False)
upload_button.observe(handle_upload, names='value')

# Display the FileUpload widget
display(upload_button)


In [1]:
import pandas as pd
import io
from IPython.display import display
import ipywidgets as widgets

# Function to handle file upload and processing
def handle_upload(change):
    uploaded_file = upload_button.value
    if uploaded_file:
        # Read the file into a DataFrame
        content = uploaded_file['content']
        file_extension = uploaded_file['metadata']['name'].split('.')[-1]

        if file_extension.lower() == 'csv':
            df = pd.read_csv(io.BytesIO(content))
        elif file_extension.lower() in ['xls', 'xlsx']:
            df = pd.read_excel(io.BytesIO(content))
        else:
            print("Unsupported file format. Please upload a CSV or Excel file.")
            return

        # Display the DataFrame or perform further operations
        print("DataFrame from uploaded file:")
        display(df)

# Create a FileUpload widget
upload_button = widgets.FileUpload(accept='.csv, .xlsx', multiple=False)
upload_button.observe(handle_upload, names='value')

# Display the FileUpload widget
display(upload_button)


FileUpload(value={}, accept='.csv, .xlsx', description='Upload')

KeyError: 'content'

In [None]:
import pandas as pd
from IPython.display import display, FileLink
import ipywidgets as widgets
from io import BytesIO

# Function to handle file upload and processing
def handle_upload(change):
    uploaded_file = file_upload.value
    if uploaded_file:
        # Read the file into a DataFrame
        content = uploaded_file['content']
        file_extension = uploaded_file['metadata']['name'].split('.')[-1]

        if file_extension.lower() == 'csv':
            df = pd.read_csv(BytesIO(content))
        elif file_extension.lower() in ['xls', 'xlsx']:
            df = pd.read_excel(BytesIO(content))
        else:
            print("Unsupported file format. Please upload a CSV or Excel file.")
            return

        # Display the DataFrame or perform further operations
        print("DataFrame from uploaded file:")
        display(df)

# Create a FileUpload widget
file_upload = widgets.FileUpload(accept='.csv, .xlsx', multiple=False)
file_upload.observe(handle_upload, names='value')

# Display the FileUpload widget
display(file_upload)


In [None]:
import pandas as pd
from IPython.display import display
from io import BytesIO

# Function to read and display a DataFrame from a file
def read_and_display_file(file_path):
    file_extension = file_path.split('.')[-1]

    if file_extension.lower() == 'csv':
        df = pd.read_csv(file_path)
    elif file_extension.lower() in ['xls', 'xlsx']:
        df = pd.read_excel(file_path)
    else:
        print("Unsupported file format. Please provide a CSV or Excel file.")
        return

    # Display the DataFrame or perform further operations
    print("DataFrame from selected file:")
    display(df)

# Ask the user to input the file path
file_path = input("Please enter the full path of the CSV or Excel file: ")

# Call the function to read and display the DataFrame
read_and_display_file(file_path)


In [None]:
import os

import tkinter as tk from tkinter import filedialog

#Get the directory of the current script

base_dir = os.path.dirname(os.path.realpath(file))

#Create a Tkinter root window root = tk.Tk()

root.withdraw() # Hide the root window

#Open a file dialog for the user to select a file

file_path = filedialog.askopenfilename(initialdir=os.path.expanduser("~/Desktop"), title="Select a file")

#Check if a file was selected

if file path: print("Selected file:", file_path)

#Specify the directory where you want to save the uploaded file upload_dir = os.path.join(base_dir, "uploaded_files")

#Create the directory if it doesn't exist os.makedirs(upload_dir, exist_ok=True)

#Extract the file name from the selected file path file name os.path.basename(file path)

Specify the path to save the uplsaded file upload_path= os.path.join(upload dir, file_name)

Move the selected file to the upload directory os.rename(file path, upload path)

print("File saved to:", upload path) else: print("No file selected.")