In [9]:
import pandas as pd

input_file = 'input.csv'
transform_data(input_file)

def transform_data(input_file):
  """
  Transforms employee data from a columnar format into a historical,
  row-based versioning format suitable for database storage.

  Args:
    input_file (str): Path to the input CSV file.

  Returns:
    None
  """

  # Read the CSV file into a pandas DataFrame
  df = pd.read_csv(input_file)

  # Rename columns for clarity
  df.columns = [
      'Employee', 'Manager', 'E Last Comp', 'Compens', 'Last Pay R',
      'Variable F', 'Tenure in Yrs', 'Performar', 'Engageme', 'Effective I', 'End Date'
  ]

  # Create 'Employee ID' and 'Pay Rate' columns from existing columns
  df['Employee ID'] = df['Employee'].str.split().str[0]
  df['Pay Rate'] = df['Compens'].str.split().str[0]

  # Drop unnecessary columns
  df = df.drop(columns=['Compens', 'Last Pay R', 'Variable F'])

  # Convert 'Date' columns to datetime format
  df['Effective I'] = pd.to_datetime(df['Effective I'], format='%d.%m.%y')
  df['End Date'] = pd.to_datetime(df['End Date'], format='%d.%m.%y')

  # Define a function to calculate the end date for each record
  def calculate_end_date(row):
    next_date = row['Effective I'].shift(-1)
    if pd.isna(next_date):
      return '2100-01-01'  # Far future date for the latest record
    else:
      return (next_date - pd.DateOffset(days=1)).dt.strftime('%Y-%m-%d')

  # Add 'End Date' column
  df['End Date'] = df.apply(calculate_end_date, axis=1)

  # Fill missing values by carrying forward the most recent value
  df.fillna(method='ffill', inplace=True)

  # Select relevant columns for the output
  output_columns = ['Employee ID', 'Manager', 'E Last Comp', 'Pay Rate', 'Tenure in Yrs',
                    'Performar', 'Engageme', 'Effective I', 'End Date']
  df = df[output_columns]

  # Sort the DataFrame by employee ID, date, and other columns
  df = df.sort_values(by=['Employee ID', 'Effective I', 'Manager', 'E Last Comp', 'Pay Rate'])

  # Save the transformed data to a fixed output file named 'output.csv'
  df.to_csv('output.csv', index=False)

  print(f'Employee data transformed and saved to output.csv')

# Example usage
input_file = 'output.csv'  # Replace with the actual input file name
transform_data(input_file)


FileNotFoundError: [Errno 2] No such file or directory: 'output.csv'