# Fusion - Metadata Creation

In [1]:
from fusion import Fusion
import pandas as pd

## Establish the connection

In [2]:
fusion = Fusion()

## Show the available functionality

In [3]:
fusion

Fusion object 
Available methods:
+------------------------------+--------------------------------------------------------------------------------------------------+
| attribute                    | Instantiate an Attribute object with this client for metadata creation.                          |
| attributes                   | Instantiate an Attributes object with this client for metadata creation.                         |
| catalog_resources            | List the resources contained within the catalog, for example products and datasets.              |
| create_dataset_lineage       | Upload lineage to a dataset.                                                                     |
| dataset                      | Instantiate a Dataset object with this client for metadata creation.                             |
| dataset_resources            | List the resources available for a dataset, currently this will always be a datasetseries.       |
| datasetmember_resources      | List the 

## Create Product

### Create Product Object

In [4]:
my_product = fusion.product(
    identifier="PYFUSION_PRODUCT",
    title="PyFusion Product",
    description="A product created using the PyFusion SDK.",
    short_abstract="A product created using the PyFusion SDK.",
    is_restricted=True,
    maintainer="J.P. Morgan Fusion",
    region="Global",
    publisher="J.P. Morgan",
    theme="Research"
)
my_product

Product(
identifier='PYFUSION_PRODUCT',
 title='PyFusion Product',
 category=None,
 short_abstract='A product created using the PyFusion SDK.',
 description='A product created using the PyFusion SDK.',
 is_active=True,
 is_restricted=True,
 maintainer=['J.P. Morgan Fusion'],
 region=['Global'],
 publisher='J.P. Morgan',
 sub_category=None,
 tag=None,
 delivery_channel=['API'],
 theme='Research',
 release_date=None,
 language='English',
 status='Available',
 image='',
 logo='',
 dataset=None
)

### Upload to catalog

In [None]:
my_product.create()

## Create Dataset

### Create a dataset object

In [5]:
my_dataset = fusion.dataset(
    identifier="PYFUSION_DATASET",
    title="PyFusion Dataset",
    description="A dataset created using the PyFusion SDK.",
    is_restricted=True,
    maintainer="J.P. Morgan Fusion",
    region="Global",
    publisher="J.P. Morgan",
    product="PYFUSION_PRODUCT",
    is_raw_data=False,
)
my_dataset

Dataset(
identifier='PYFUSION_DATASET',
 title='PyFusion Dataset',
 category=None,
 description='A dataset created using the PyFusion SDK.',
 frequency='Once',
 is_internal_only_dataset=False,
 is_third_party_data=True,
 is_restricted=True,
 is_raw_data=False,
 maintainer='J.P. Morgan Fusion',
 source=None,
 region=['Global'],
 publisher='J.P. Morgan',
 product=['PYFUSION_PRODUCT'],
 sub_category=None,
 tags=None,
 created_date=None,
 modified_date=None,
 delivery_channel=['API'],
 language='English',
 status='Available',
 type_='Source',
 container_type='Snapshot-Full',
 snowflake=None,
 complexity=None,
 is_immutable=None,
 is_mnpi=None,
 is_pci=None,
 is_pii=None,
 is_client=None,
 is_public=None,
 is_internal=None,
 is_confidential=None,
 is_highly_confidential=None,
 is_active=None,
 owners=None,
 application_id=None
)

In [None]:
my_dataset.create()

## Create Attributes

### Retrieve template for attributes

In [6]:
attributes_df = fusion.attributes().to_dataframe()
attributes_df

Unnamed: 0,identifier,index,dataType,title,description,isDatasetKey,source,sourceFieldId,isInternalDatasetKey,isExternallyVisible,unit,multiplier,isPropagationEligible,isMetric,availableFrom,deprecatedFrom,term,dataset,attributeType
0,example_attribute,0,String,Example Attribute,Example Attribute,False,,example_attribute,,True,,1.0,,,,,bizterm1,,


### Download and edit

In [7]:
attributes_df.to_csv('attributes.csv', index=False)

### Convert to attributes list

In [8]:
attributes = pd.read_csv('attributes.csv')
attributes

Unnamed: 0,identifier,index,dataType,title,description,isDatasetKey,source,sourceFieldId,isInternalDatasetKey,isExternallyVisible,unit,multiplier,isPropogationEligible,isMetric,availableFrom,deprecatedFrom,term,dataset,attributeType
0,example_attribute0,0,String,Example Attribute 0,Example Attribute 0,False,,example_attribute 0,,True,,1.0,,,,,bizterm1,,
1,example_attribute1,1,String,Example Attribute 1,Example Attribute 1,False,,example_attribute 1,,True,,1.0,,,,,bizterm1,,
2,example_attribute2,2,String,Example Attribute 2,Example Attribute 2,False,,example_attribute 2,,True,,1.0,,,,,bizterm1,,


In [9]:
attributes_list = fusion.attributes().from_object(attributes)
attributes_list

[
('example_attribute0', 0, <Types.String: 1>, 'Example Attribute 0', 'Example Attribute 0', False, None, 'example_attribute_0', None, True, None, 1.0, None, None, None, None, 'bizterm1', None, None),
 ('example_attribute1', 1, <Types.String: 1>, 'Example Attribute 1', 'Example Attribute 1', False, None, 'example_attribute_1', None, True, None, 1.0, None, None, None, None, 'bizterm1', None, None),
 ('example_attribute2', 2, <Types.String: 1>, 'Example Attribute 2', 'Example Attribute 2', False, None, 'example_attribute_2', None, True, None, 1.0, None, None, None, None, 'bizterm1', None, None)
]

### Upload attributes to dataset on catalog

In [None]:
attributes_list.create(dataset="PYFUSION_DATASET")

### Upload a file

In [11]:
file_df = pd.read_csv('sample.csv')
file_df

Unnamed: 0,example_attribute0,example_attribute1,example_attribute2
0,A,A,A
1,B,B,B
2,C,C,C


In [None]:
fusion.upload(
    path='sample.csv',
    dataset="PYFUSION_DATASET",
    dt_str="20241025",
)

## Create Raw Dataset

In [7]:
my_raw_dataset = fusion.dataset(
    identifier="PYFUSION_RAW_DATASET",
    title="PyFusion Raw Dataset",
    description="A dataset created using the PyFusion SDK.",
    is_restricted=True,
    maintainer="J.P. Morgan Fusion",
    region="Global",
    publisher="J.P. Morgan",
    product="PYFUSION_PRODUCT",
    is_raw_data=True,
)
my_raw_dataset

Dataset(
identifier='PYFUSION_RAW_DATASET',
 title='PyFusion Raw Dataset',
 category=None,
 description='A dataset created using the PyFusion SDK.',
 frequency='Once',
 is_internal_only_dataset=False,
 is_third_party_data=True,
 is_restricted=True,
 is_raw_data=True,
 maintainer='J.P. Morgan Fusion',
 source=None,
 region=['Global'],
 publisher='J.P. Morgan',
 product=['PYFUSION_PRODUCT'],
 sub_category=None,
 tags=None,
 created_date=None,
 modified_date=None,
 delivery_channel=['API'],
 language='English',
 status='Available',
 type_='Source',
 container_type='Snapshot-Full',
 snowflake=None,
 complexity=None,
 is_immutable=None,
 is_mnpi=None,
 is_pci=None,
 is_pii=None,
 is_client=None,
 is_public=None,
 is_internal=None,
 is_confidential=None,
 is_highly_confidential=None,
 is_active=None,
 owners=None,
 application_id=None
)

In [None]:
my_raw_dataset.create()

## Upload data without schema

In [None]:
fusion.upload(
    path='sample.csv',
    dataset="PYFUSION_RAW_DATASET",
    dt_str="20241025",
)