<a href="https://colab.research.google.com/github/sesath-dissanayake/lighthouse-2.0/blob/LIGII-182/lighthouse_transformation_template_2_0_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# We are currently in the process of migrating Lighthouse to Pyton version 3.11 and at the moment colab does not support Python 3.11 runtime.

## Run the following cell to check installed Python libraries in colab runtime and if they matches our requirements.

[Colab Updated to Python 3.10](https://colab.google/articles/py3.10)

In [None]:
import importlib.util

def check_library_and_version(library_name, expected_version):
    """Checks if a library is installed and its version matches.

    Args:
        library_name (str): Name of the library to check.
        expected_version (str): Expected version of the library.

    Returns:
        tuple: (bool, str):
            - True if the library is installed and the version matches, False otherwise.
            - String message about the result (success, mismatch, not found).
    """

    if not importlib.util.find_spec(library_name):
        return False, f"❌ Library '{library_name}' not found in this runtime."

    try:
        library = importlib.import_module(library_name)
        actual_version = getattr(library, "__version__", "unknown")
        if actual_version == expected_version:
            return True, f"✔️ Library '{library_name}' version {actual_version} matches expected version."
        else:
            return False, f"⚠️ Library '{library_name}' version {actual_version} does not match expected version {expected_version}."

    except ImportError:
        return False, f"❌❌ ImportError: An error occurred while importing '{library_name}'."

# Define libraries and expected versions
libraries_and_versions = {
    "pandas": "1.5.3",
    "pandas-schema": "0.3.4",
    "multidict": "6.0.4",
    "numpy": "1.23.3",
    "pytz": "2022.6",
    "lxml": "4.9.3",
    "importlib-metadata": "5.1.0",
    "zipp": "3.11.0",
    "certifi": "2024.2.2",
    "charset-normalizer": "3.3.2",
    "idna": "3.6",
    "urllib3": "2.2.1",
    "requests": "2.31.0",
    "blosc2": "2.0.0",
    "cython": "0.29.21",
    "msgpack": "1.0.5",
    "numexpr": "2.9.0",
    "packaging": "22.0",
    "py-cpuinfo": "9.0.0",
    "tables": "3.8.0",
}

# Create a list to store results
library_results = []

for library_name, expected_version in libraries_and_versions.items():
    result, message = check_library_and_version(library_name, expected_version)
    library_results.append((library_name, result, message))

# Print results
for library_name, result, message in library_results:
    print(f"\n{library_name}:")
    print(f"{message}")

# Save results for next cell
import pickle
with open('library_results.pkl', 'wb') as f:
  pickle.dump(library_results, f)

## Install required libraries

In [None]:
import sys
import pickle

# Load results if saved
with open('library_results.pkl', 'rb') as f:
  library_results = pickle.load(f)

# Install libraries based on results

for library_name, result, message in library_results:
    if not result:
        print(f"\n⏳ Installing library '{library_name}'...")
        try:
            !pip install "{library_name}=={libraries_and_versions[library_name]}"
            print(f"✅ Successfully installed '{library_name}'!")
        except Exception as e:
            print(f"❌ Failed to install '{library_name}': {e}")

## Restart the runtime session to initialize the libraries.

In [None]:
import os
os.kill(os.getpid(), 9)

# Lighthouse Transformation Scratchbook

<img src="https://i.ibb.co/qxGXnR9/lighthouse-logo-629x500.png" alt="Lighthouse" width="100"/>

## Get Started
Run the following code cells and following the instructions to get started.

### **Step 1**
Paste the Dataframe shared url generated from Lighthouse below to download and read the dataframe.

In [None]:
DATA_DF_URL = "<PASTE DATA SHARE URL>"

In [None]:
## This is to load hdf file correctly.
!pip install tables==3.8.0

In [None]:
import requests
import pandas as pd
import datetime as datetime

r = requests.get(DATA_DF_URL, allow_redirects=True)

data_filename = "data.h5"
with open(data_filename, 'wb') as f:
  f.write(r.content)

data = pd.read_hdf(data_filename, 'data')
mappings = pd.read_hdf(data_filename, 'mappings')

### **Step 2**

Edit the cell below to add your transformation code

In [None]:
def transform(data, mappings):
  ## ....
  ## Transformation code here
  ## ....
  return data

In [None]:
## Test transformation
transform(data, mappings)