In [4]:
!pip install jsonpath_ng



In [5]:
import os
import json
import numpy as np
import pandas as pd
import sqlite3
import functools as ft
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from jsonpath_ng import parse
from enum import Enum
%matplotlib inline

# Source to Target Mapping - OOP Approach for Single Table

### Data

In [36]:
with open("imdb.json", "r") as file:
    data = json.loads(file.read())
    # data = json.loads(json.load(file))
    for item in data:
        item["Title"] = str(item["Title"])
        item["Director"] = str(item["Director"])
        item["Rotten_Tomatoes_Rating"] = str(item["Rotten_Tomatoes_Rating"])
        item["MPAA_Rating"] = str(item["MPAA_Rating"])
        item["Distributor"] = str(item["Distributor"])
        item["Source"] = str(item["Source"])
        item["Major_Genre"] = str(item["Major_Genre"])
        item["Creative_Type"] = str(item["Creative_Type"])
        item["US_Gross"] = str(item["US_Gross"])
        item["Worldwide_Gross"] = str(item["Worldwide_Gross"])
        item["US_DVD_Sales"] = str(item["US_DVD_Sales"])
        item["Production_Budget"] = str(item["Production_Budget"])
        item["Release_Date"] = str(item["Release_Date"])
        item["Running_Time_(min)"] = str(item["Running_Time_(min)"])
        item["IMDB_Rating"] = str(item["IMDB_Rating"])
        item["IMDB_Votes"] = str(item["IMDB_Votes"])
    print(data[:10])

[{'Title': 'The Land Girls', 'US_Gross': '146083', 'Worldwide_Gross': '146083', 'US_DVD_Sales': '', 'Production_Budget': '8000000', 'Release_Date': '35958', 'MPAA_Rating': 'R', 'Running_Time_(min)': '', 'Distributor': 'Gramercy', 'Source': '', 'Major_Genre': '', 'Creative_Type': '', 'Director': '', 'Rotten_Tomatoes_Rating': '', 'IMDB_Rating': '6.1', 'IMDB_Votes': '1071'}, {'Title': 'First Love, Last Rites', 'US_Gross': '10876', 'Worldwide_Gross': '10876', 'US_DVD_Sales': '', 'Production_Budget': '300000', 'Release_Date': '36014', 'MPAA_Rating': 'R', 'Running_Time_(min)': '', 'Distributor': 'Strand', 'Source': '', 'Major_Genre': 'Drama', 'Creative_Type': '', 'Director': '', 'Rotten_Tomatoes_Rating': '', 'IMDB_Rating': '6.9', 'IMDB_Votes': '207'}, {'Title': 'I Married a Strange Person', 'US_Gross': '203134', 'Worldwide_Gross': '203134', 'US_DVD_Sales': '', 'Production_Budget': '250000', 'Release_Date': '36035', 'MPAA_Rating': '', 'Running_Time_(min)': '', 'Distributor': 'Lionsgate', 'Sou

###  Abstract Base Calss (ABC) for mutual methods

In [37]:
class Interface(ABC):

    @abstractmethod
    def get_data_by_field(self, field_name):
        """Fetch the data by given feild name """

    @abstractmethod
    def get_data_by_id(self, id):
        """Fetch the data by given ID  """

    @abstractmethod
    def get(self):
        """Fetch all data """

### Transform Operations
inherithed from Enum - class that automatic enumrate the variables

In [38]:
class TransformMask(Enum):
    # add here any masks you want
    CLEAN_STRING = ".strip().lower()"
    CAPITAL_LETTER = ".strip().lower().title()"


### Database Class - Define Common Properties for Source, Target, Mapping

In [103]:
class Database:
    def __init__(self):
        pass

    # built-in function that creates and returns a property object
    # get data by: get_data_source_target_mapping.get(dict_key)
    @property
    def get_data_source_target_mapping(self):
        return {
            "source": [
                {
                    "id": 1,
                    "source_field_name": "Title",
                    "source_field_mapping": "$.Title",
                    "source_field_type": "str", # use python types
                    "is_required": True,

                },
                {
                    "id": 2,
                    "source_field_name": "US_Gross",
                    "source_field_mapping": "$.US_Gross",
                    "source_field_type": "str",
                    "is_required": True,
                },
                {
                    "id": 3,
                    "source_field_name": "Worldwide_Gross",
                    "source_field_mapping": "$.Worldwide_Gross",
                    "source_field_type": "str",
                    "is_required": True,

                },                {
                    "id": 4,
                    "source_field_name": "US_DVD_Sales",
                    "source_field_mapping": "$.US_DVD_Sales",
                    "source_field_type": "str",
                    "is_required": True,

                },
                 {
                    "id": 5,
                    "source_field_name": "Production_Budget",
                    "source_field_mapping": "$.Production_Budget",
                    "source_field_type": "str",
                    "is_required": True,

                },                {
                    "id": 6,
                    "source_field_name": "Rotten_Tomatoes_Rating",
                    "source_field_mapping": "$.Rotten_Tomatoes_Rating",
                    "source_field_type": "str",
                    "is_required": True,

                },                {
                    "id": 7,
                    "source_field_name": "IMDB_Rating",
                    "source_field_mapping": "$.IMDB_Rating",
                    "source_field_type": "str",
                    "is_required": True,

                },                {
                    "id": 8,
                    "source_field_name": "IMDB_Votes",
                    "source_field_mapping": "$.IMDB_Votes",
                    "source_field_type": "str",
                    "is_required": True,

                },
                {
                    "id": 9,
                    "source_field_name": "Director",
                    "source_field_mapping": "$.Director",
                    "source_field_type": "str",
                    "is_required": True,

                },
                {
                    "id": 10,
                    "source_field_name": "Distributor",
                    "source_field_mapping": "$.Distributor",
                    "source_field_type": "str",
                    "is_required": True,

                },                {
                    "id": 11,
                    "source_field_name": "Release_Date",
                    "source_field_mapping": "$.Release_Date",
                    "source_field_type": "str",
                    "is_required": True,

                },    {
                    "id": 12,
                    "source_field_name": "Major_Genre",
                    "source_field_mapping": "$.Major_Genre",
                    "source_field_type": "str",
                    "is_required": True,

                },
               {
                    "id": 13,
                    "source_field_name": "Creative_Type",
                    "source_field_mapping": "$.Creative_Type",
                    "source_field_type": "str",
                    "is_required": True,

                },    {
                    "id": 14,
                    "source_field_name": "Source",
                    "source_field_mapping": "$.Source",
                    "source_field_type": "str",
                    "is_required": True,

                }
            ],
            "destination": [
                {
                    "id": 1,
                    "destination_field_name": "Title",
                    "destination_field_mapping": "Title",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },
                {
                    "id": 2,
                    "destination_field_name": "US_Gross",
                    "destination_field_mapping": "US_Gross",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },
                 {
                    "id": 3,
                    "destination_field_name": "Worldwide_Gross",
                    "destination_field_mapping": "Worldwide_Gross",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },                {
                    "id": 4,
                    "destination_field_name": "US_DVD_Sales",
                    "destination_field_mapping": "US_DVD_Sales",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },
                 {
                    "id": 5,
                    "destination_field_name": "Production_Budget",
                    "destination_field_mapping": "Production_Budget",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },                {
                    "id": 6,
                    "destination_field_name": "Rotten_Tomatoes_Rating",
                    "destination_field_mapping": "Rotten_Tomatoes_Rating",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },                {
                    "id": 7,
                    "destination_field_name": "IMDB_Rating",
                    "destination_field_mapping": "IMDB_Rating",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },                {
                    "id": 8,
                    "destination_field_name": "IMDB_Votes",
                    "destination_field_mapping": "IMDB_Votes",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "fact"
                },
                {
                    "id": 9,
                    "destination_field_name": "Director",
                    "destination_field_mapping": "Director",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimDirector"
                },                {
                    "id": 10,
                    "destination_field_name": "Distributor",
                    "destination_field_mapping": "Distributor",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimDist"
                    },
                {
                    "id": 11,
                    "destination_field_name": "Release_Date",
                    "destination_field_mapping": "Release_Date",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimRelease"
                },                {
                    "id": 12,
                    "destination_field_name": "Major_Genre",
                    "destination_field_mapping": "Major_Genre",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimGenre"
                },                {
                    "id": 13,
                    "destination_field_name": "Creative_Type",
                    "destination_field_mapping": "Creative_Type",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimType"
                },                {
                    "id": 14,
                    "destination_field_name": "Source",
                    "destination_field_mapping": "Source",
                    "destination_field_type": "str",
                    "default_value": "n/a",
                    "destination_table": "dimSource"
                }

            ],
            "transform": [ # using the Enums
                {
                    "id": 1,
                    "transform_mask": 'CAPITAL_LETTER'
                },
                {
                    "id": 2,
                    "transform_mask": 'CLEAN_STRING'
                }
            ],
            "mapping": [
                {
                    "id": 1,
                    "mapping_source": 1,
                    "mapping_destination": 1,
                    "mapping_transform": 1,
                    "destination_table": "fact"
                },
                {
                    "id": 2,
                    "mapping_source": 2,
                    "mapping_destination": 2,
                    "destination_table": "fact"
                },
                {
                    "id": 3,
                    "mapping_source": 3,
                    "mapping_destination": 3,
                    "destination_table": "fact"
                },                {
                    "id": 4,
                    "mapping_source": 4,
                    "mapping_destination": 4,
                    "destination_table": "fact"
                },
                 {
                    "id": 5,
                    "mapping_source": 5,
                    "mapping_destination": 5,
                    "destination_table": "fact"
                },                {
                    "id": 6,
                    "mapping_source": 6,
                    "mapping_destination": 6,
                    "destination_table": "fact"
                },                {
                    "id": 7,
                    "mapping_source": 7,
                    "mapping_destination": 7,
                    "destination_table": "fact"
                },                {
                    "id": 8,
                    "mapping_source": 8,
                    "mapping_destination": 8,
                    "destination_table": "fact"
                },
                {
                    "id": 9,
                    "mapping_source": 9,
                    "mapping_destination": 9,
                    "mapping_transform": 1,
                    "destination_table": "dimDirector"

                },                {
                    "id": 10,
                    "mapping_source": 10,
                    "mapping_destination": 10,
                    "mapping_transform": 1,
                    "destination_table": "dimDist"

                },
                 {
                    "id": 11,
                    "mapping_source": 11,
                    "mapping_destination": 11,
                    "mapping_transform": 1,
                    "destination_table": "dimRelease"

                },
                 {
                    "id": 12,
                    "mapping_source": 12,
                    "mapping_destination": 12,
                    "mapping_transform": 1,
                    "destination_table": "dimGenre"

                },                 {
                    "id": 13,
                    "mapping_source": 13,
                    "mapping_destination": 13,
                    "mapping_transform": 1,
                    "destination_table": "dimType"

                },                 {
                    "id": 14,
                    "mapping_source": 14,
                    "mapping_destination": 14,
                    "mapping_transform": 1,
                    "destination_table": "dimSource"

                }
            ]
        }

In [104]:
# keysList = ['Title', 'US_Gross', 'Worldwide_Gross', 'US_DVD_Sales', 'Production_Budget', 'Rotten_Tomatoes_Rating', 'IMDB_Rating', 'IMDB_Votes']
# Dim1 = ['Director']
# class Database:
#     def __init__(self):
#         self.db = {}

#     def add_source(self):
#         self.db["source"] = []
#         for i in range(1, len(keysList) + 1):
#             self.db["source"].append({
#                 "id": i,
#                 "source_field_name": keysList[i - 1],
#                 "source_field_mapping": keysList[i - 1],
#                 "source_field_type": "str",
#                 "is_required": True,
#             })
#         for i in range(1, len(Dim1) + 1):
#             self.db["source"].append({
#                 "id": i,
#                 "source_field_name": Dim1[i - 1],
#                 "source_field_mapping": Dim1[i - 1],
#                 "source_field_type": "str",
#                 "is_required": True,
#             })

#     def add_destination(self):
#         self.db["destination"] = []
#         for i in range(1, len(keysList) + 1):
#             self.db["destination"].append({
#                 "id": i,
#                 "destination_field_name": keysList[i - 1],
#                 "destination_field_mapping": keysList[i - 1],
#                 "destination_field_type": "str",
#                 "default_value": "n/a",
#                 "destination_table": "fact"
#             })
#         for i in range(1, len(Dim1) + 1):
#             self.db["destination"].append({
#                 "id": i,
#                 "destination_field_name": Dim1[i - 1],
#                 "destination_field_mapping": Dim1[i - 1],
#                 "destination_field_type": "str",
#                 "default_value": "n/a",
#                 "destination_table": "DimDir"
#             })

#     def add_transform(self):
#         self.db["transform"] = [
#             {
#                 "id": 1,
#                 "transform_mask": 'CAPITAL_LETTER',
#                 "destination_table": "fact"
#             },
#             {
#                 "id": 2,
#                 "transform_mask": 'CLEAN_STRING',
#                 "destination_table": "fact"
#             }
#         ]

#     def add_mapping(self):
#         self.db["mapping"] = []
#         for i in range(1, len(keysList) + 1):
#             self.db["mapping"].append(
#                 {
#                     "id": i,
#                     "mapping_source": i,
#                     "mapping_destination": i,
#                     "mapping_transform": 1,
#                     "destination_table": "fact"

#                 }
#             )
#         for i in range(1, len(Dim1) + 1):
#             self.db["mapping"].append(
#                 {
#                     "id": i,
#                     "mapping_source": i,
#                     "mapping_destination": i,
#                     "mapping_transform": 1,
#                     "destination_table": "DimDir"

#                 }
#             )

#     def data_source_target_mapping(self):
#         self.add_source()
#         self.add_destination()
#         self.add_transform()
#         self.add_mapping()

#     @property
#     def get_data_source_target_mapping(self):
#         self.data_source_target_mapping()
#         return self.db



### Source class

Inherited from Interface for the common methods and from Database for common variables

In [105]:
class Source(Interface, Database):
    def __init__(self):
        Database.__init__(self)

    # should be implemented - inherited from Interface
    def get_data_by_field(self, field_name):
        data = self.get
        for item in data:
            for key, value in item.items():
                if key == field_name:
                    return item
        return None

    @property
    def get(self):
        return self.get_data_source_target_mapping.get("source")

    def get_data_by_id(self, id):
        self.id = id
        data = self.get
        for x in data:
            if x.get("id") == self.id:
                return x
        return None

### Target class

Inherited from Interface for the common methods and from Database for common variables

In [106]:
class Target(Interface, Database):

    def __init__(self):
        Database.__init__(self)

    # should be implemented - inherited from Interface
    def get_data_by_field(self, field_name):
        data = self.get
        for item in data:
            for key, value in item.items():
                if key == field_name:
                    return item
        return None

    @property
    def get(self):
        return self.get_data_source_target_mapping.get("destination")

    def get_data_by_id(self, id):
        self.id = id
        data = self.get
        for x in data:
            if x.get("id").__str__() == self.id.__str__():
                return x
        return None

### Transform Class

Inherited from Interface for the common methods and from Database for common variables

In [107]:
class Transform(Interface, Database):

    def __init__(self):
        Database.__init__(self)

    # should be implemented - inherited from Interface
    def get_data_by_field(self, field_name):
        data = self.get
        for item in data:
            for key, value in item.items():
                if key == field_name:
                    return item
        return None

    @property
    def get(self):
        return self.get_data_source_target_mapping.get("transform", [])

    def get_data_by_id(self, id):
        self.id = id
        data = self.get
        for x in data:
            if x.get("id").__str__() == self.id.__str__():
                return x
        return None

### Mapping class

Inherited from Interface for the common methods and from Database for common variables

In [108]:
class Mappings(Interface, Database):

    def __init__(self):
        Database.__init__(self)

    @property
    def get(self):
        return self.get_data_source_target_mapping.get("mapping")

    def get_data_by_id(self, id):
        self.id = id
        data = self.get
        for x in data:
            if x.get("id").__str__() == self.id.__str__():
                return x
        return None

    def get_data_by_field(self, field_name):
        return None

### Format Class - JSON

Search the source data value inside a JSON file

In [109]:
class JsonQuery:
    def __init__(self, json_path, json_data):
        self.json_path = json_path
        self.json_data = json_data

    def get(self):
        jsonpath_expression = parse(self.json_path)
        match = jsonpath_expression.find(self.json_data)
        source_data_value = match[0].value
        return source_data_value

### Combine it All - STTM

In [110]:
class STTM:
    def __init__(self, input_json):
        self.input_json = input_json
        self.mapping_instance = Mappings()
        self.source_instance = Source()
        self.destination_instance = Target()
        self.transform_instance = Transform()
        self.look_up_mask = {i.name: i.value for i in TransformMask}
        self.json_data_transformed = {}
        self.to_table = {}

    def _get_mapping_data(self):
        return self.mapping_instance.get

    def _get_mapping_source_data(self):
        return self.source_instance.get

    def get_transformed_data(self):

        for mappings in self._get_mapping_data():

            """fetch the source mapping """
            mapping_source_id = mappings.get("mapping_source")
            mapping_destination_id = mappings.get("mapping_destination")
            mapping_transform_id = mappings.get("mapping_transform")
            mapping_table = mappings.get("destination_table")
            self.json_data_transformed["to_table"] = mapping_table
            print("@", mapping_table)

            mapping_source_data = self.source_instance.get_data_by_id(id=mapping_source_id)
            transform_data = self.transform_instance.get_data_by_id(id=mapping_transform_id)

            """Fetch Source  field Name"""
            source_field_name = mapping_source_data.get("source_field_name")

            """if field given is not present incoming json """
            if source_field_name not in self.input_json.keys():
                if mapping_source_data.get("is_required"):
                    raise Exception(
                        "Alert ! Field {} is not present in JSON please FIX mappings ".format(source_field_name))
                else:
                    pass

            else:
                source_data_value = JsonQuery(
                    json_path=mapping_source_data.get("source_field_mapping"),
                    json_data=self.input_json
                ).get()

                """check the data type for source if matches with what we have """
                if mapping_source_data.get("source_field_type") != type(source_data_value).__name__:
                    if source_data_value is not None:
                        _message = (
                            "Alert ! Source Field :{} Datatype has changed from {} to {} ".format(source_field_name,
                                                                                                  mapping_source_data.get(
                                                                                                      "source_field_type"),
                                                                                                  type(
                                                                                                      source_data_value).__name__))
                        print(_message)
                        raise Exception(_message)

                """Query and fetch the Destination | target """
                destination_mappings_json_object = self.destination_instance.get_data_by_id(
                    id=mappings.get("mapping_destination"))

                destination_field_name = destination_mappings_json_object.get("destination_field_name")
                destination_field_type = destination_mappings_json_object.get("destination_field_type")


                # destination_table = destination_mappings_json_object.get("destination_table")
                # if "to_table" in self.json_data_transformed:
                #   self.json_data_transformed["to_table"].append(destination_table)
                # else:
                #   self.json_data_transformed["to_table"] = [destination_table]
                # print("!!!!", destination_table)
                # # self.json_data_transformed["to_table"] = destination_table

                self.to_table[destination_field_name] = mapping_table

                dtypes = [str, float, list, int, set, dict]

                for dtype in dtypes:

                    """Datatype Conversion """
                    if destination_field_type == str(dtype.__name__):

                        """is source is none insert default value"""
                        if source_data_value is None:
                            self.json_data_transformed[destination_field_name] = dtype.__call__(
                                destination_mappings_json_object.get("default_value")
                            )

                        else:
                            """check if you have items to transform"""
                            if transform_data is not None:
                                """ check for invalid mask name """
                                if transform_data.get("transform_mask") not in list(self.look_up_mask.keys()):
                                    raise Exception(
                                        f"Specified Transform {transform_data.get('transform_mask')} is not available please select from following Options :{list(self.look_up_mask.keys())}")
                                else:
                                    mask_apply = self.look_up_mask.get(transform_data.get("transform_mask"))
                                    converted_dtype = dtype.__call__(source_data_value)
                                    mask = f'converted_dtype{mask_apply}'
                                    curated_value = eval(mask)
                                    self.json_data_transformed[destination_field_name] = curated_value

                            else:
                                self.json_data_transformed[destination_field_name] = dtype.__call__(source_data_value)

        return self.json_data_transformed, self.to_table

In [112]:
transformed_data = []
for item in data:
    helper = STTM(input_json=item)
    response = helper.get_transformed_data()
    transformed_data.append(response)
    print(response)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
@ dimRelease
@ dimGenre
@ dimType
@ dimSource
({'to_table': 'dimSource', 'Title': 'Stuart Little', 'US_Gross': '140015224', 'Worldwide_Gross': '298800000', 'US_DVD_Sales': '', 'Production_Budget': '105000000', 'Rotten_Tomatoes_Rating': '65', 'IMDB_Rating': '5.8', 'IMDB_Votes': '23226', 'Director': 'Rob Minkoff', 'Distributor': 'Sony Pictures', 'Release_Date': '36511', 'Major_Genre': 'Comedy', 'Creative_Type': 'Kids Fiction', 'Source': 'Based On Book/Short Story'}, {'Title': 'fact', 'US_Gross': 'fact', 'Worldwide_Gross': 'fact', 'US_DVD_Sales': 'fact', 'Production_Budget': 'fact', 'Rotten_Tomatoes_Rating': 'fact', 'IMDB_Rating': 'fact', 'IMDB_Votes': 'fact', 'Director': 'dimDirector', 'Distributor': 'dimDist', 'Release_Date': 'dimRelease', 'Major_Genre': 'dimGenre', 'Creative_Type': 'dimType', 'Source': 'dimSource'})
@ fact
@ fact
@ fact
@ fact
@ fact
@ fact
@ fact
@ fact
@ dimDirector
@ dimDist
@ dimRelease
@ dimGenre
@ d

In [113]:
response

({'to_table': 'dimSource',
  'Title': 'Zoolander',
  'US_Gross': '45172250',
  'Worldwide_Gross': '60780981',
  'US_DVD_Sales': '',
  'Production_Budget': '28000000',
  'Rotten_Tomatoes_Rating': '62',
  'IMDB_Rating': '6.4',
  'IMDB_Votes': '69296',
  'Director': 'Ben Stiller',
  'Distributor': 'Paramount Pictures',
  'Release_Date': '37162',
  'Major_Genre': 'Comedy',
  'Creative_Type': 'Contemporary Fiction',
  'Source': 'Original Screenplay'},
 {'Title': 'fact',
  'US_Gross': 'fact',
  'Worldwide_Gross': 'fact',
  'US_DVD_Sales': 'fact',
  'Production_Budget': 'fact',
  'Rotten_Tomatoes_Rating': 'fact',
  'IMDB_Rating': 'fact',
  'IMDB_Votes': 'fact',
  'Director': 'dimDirector',
  'Distributor': 'dimDist',
  'Release_Date': 'dimRelease',
  'Major_Genre': 'dimGenre',
  'Creative_Type': 'dimType',
  'Source': 'dimSource'})

In [114]:
df = pd.DataFrame(transformed_data)
df

Unnamed: 0,0,1
0,"{'to_table': 'dimSource', 'Title': 'The Land G...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
1,"{'to_table': 'dimSource', 'Title': 'First Love...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
2,"{'to_table': 'dimSource', 'Title': 'I Married ...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
3,"{'to_table': 'dimSource', 'Title': 'Let'S Talk...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
4,"{'to_table': 'dimSource', 'Title': 'Slam', 'US...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
...,...,...
3190,"{'to_table': 'dimSource', 'Title': 'The Young ...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
3191,"{'to_table': 'dimSource', 'Title': 'The Young ...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
3192,"{'to_table': 'dimSource', 'Title': 'Zathura', ...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."
3193,"{'to_table': 'dimSource', 'Title': 'Zero Effec...","{'Title': 'fact', 'US_Gross': 'fact', 'Worldwi..."


In [116]:

import pandas as pd

# Assuming `transformed_data` contains the transformed data as a list of dictionaries

# Convert the list of dictionaries to a DataFrame
df_transformed = pd.DataFrame(transformed_data)

# Save as CSV file
Data_Warehouse = "data_warehouse.csv"
df_transformed.to_csv(Data_Warehouse, index=False)
