In [25]:
import pandas as pd

TumData=pd.read_csv('Sources/TumDataBattINFO.csv')

TumData['Length (mm)'] = pd.to_numeric(TumData['Length (mm)'], errors='coerce')#TumData['Length (mm)'].astype(float)
TumData['Height (mm)'] = pd.to_numeric(TumData['Height (mm)'], errors='coerce')#TumData['Height (mm)'].astype(float)
TumData['Width (mm)'] = pd.to_numeric(TumData['Width (mm)'], errors='coerce')#TumData['Width (mm)'].astype(float)
TumData['Diameter (mm)'] = pd.to_numeric(TumData['Diameter (mm)'], errors='coerce')#TumData['Diameter (mm)'].astype(float)
TumData['Cycle Life'] = pd.to_numeric(TumData['Cycle Life'], errors='coerce')

#DischargingCurrent
#MaximumContinuousDischargingCurrent

In [20]:
import json
import pandas as pd
import numpy as np


def extract_valid_citations(json_file_path,cell_name):
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    valid_citations= []

    for entry in data:
        if entry.get('state') == 'valid':
            cell_name_entry = entry['filename'].replace('.json', '')
            if cell_name_entry in cell_name:
                valid_citations.append(entry['_id'])
        
    return valid_citations

def fixPG_DOI_ID(citations):
     PG_DOI=pd.read_csv('Sources/pg_doi.csv')
     PG_S2=pd.read_csv('Sources/pg_s2.csv')
     citations_only_DOI=[]
     for citation in citations:
        if citation.isdigit():
            #print('is digit')
            #print('new DOI ' + PG_DOI.loc[PG_DOI['pgid'] == int(citation), 'doi'].values)
            if not pd.isna(PG_DOI.loc[PG_DOI['pgid'] == int(citation), 'doi'].values[0]):
                citations_only_DOI.append({"@id" : "https://doi.org/" + PG_DOI.loc[PG_DOI['pgid'] == int(citation), 'doi'].values[0].replace('_','/')})
            else:
                citations_only_DOI.append({"@id" : "https://www.semanticscholar.org/paper/" + PG_S2.loc[PG_S2['pgid'] == int(citation), 's2id'].values[0]})
                #citations_only_DOI.append({"@id" : "https://fixPostgresLink.com/" + citation})   
        else:
            citations_only_DOI.append({"@id" : "https://doi.org/" + citation.replace('_','/')}) 
     return citations_only_DOI

def retrieveMetadataForCitations(citations):
    return citationsAndMeta


In [3]:
valid_citations = extract_valid_citations('Sources/valid_hits.json','Panasonic_NCR18650BF')
valid_citations = fixPG_DOI_ID(valid_citations)


In [34]:
import json

def check_if_NaN(value):
    return "NaN" if pd.isna(value) else value

def check_if_NaN_1000(value):
    return "NaN" if pd.isna(value) else value/1000

def create_json_files(df):
    df.reset_index(drop=True, inplace=True)

    for index, row in df.iterrows(): #.iloc[170:171] .iloc[46:47] .iloc[44:45]
        cell_format = row['Format']

        data = {
            "@context": "https://w3id.org/emmo/domain/battery/context",
            "schema:name": row['Part #'],
            "schema:manufacturer": {
                "schema:name": row['Company Name']
            },
            "hasPositiveElectrode": {
                "@type": "PositiveElectrode",
                "hasActiveMaterial": {
                    "@type": check_if_NaN(row['Chemistry']),
                    "schema:citation": {
                        "@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"
                    }
                }
            }}
        
        valid_citations = extract_valid_citations('Sources/valid_hits.json',row['Part #'])
        valid_citations = fixPG_DOI_ID(valid_citations)
        if valid_citations:
            data["schema:subjectOf"]=valid_citations

        
        if cell_format.lower() == "cylindricalcase":
            cell_name = row['Part #']
            if "18650" in cell_name:
                data["hasCase"] = [{"@type": "R18650"}]
            elif "21700" in cell_name:
                data["hasCase"] = [{"@type": "R21700"}]
            elif "26650" in cell_name:
                data["hasCase"] = [{"@type": "R26650"}]    
            
        elif cell_format.lower() in ["pouchcase", "prismaticcase"]:
            data["hasCase"] = [{"@type": row['Cell Shape']}]

        data["hasProperty"]=[
                {
                    "@type": ["RatedCapacity", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Max Capacity (AH)'])
                    },
                    "hasMeasurementUnit": "emmo:AmpereHour",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["CycleLife", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Cycle Life'])
                    },
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["NominalVoltage", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Nominal Voltage (V)'])
                    },
                    "hasMeasurementUnit": "emmo:Volt",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["UpperVoltageLimit", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Open Circuit Voltage (V)'])
                    },
                    "hasMeasurementUnit": "emmo:Volt",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["LowerVoltageLimit", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Cut Off Voltage (V)'])
                    },
                    "hasMeasurementUnit": "emmo:Volt",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["DischargingCurrent", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Standard Discharge Current (A)'])
                    },
                    "hasMeasurementUnit": "emmo:Ampere",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["MaximumContinuousDischargingCurrent", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Max Constant Discharge Current (A)'])
                    },
                    "hasMeasurementUnit": "emmo:Ampere",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["Mass", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Weight (gr)'])
                    },
                    "hasMeasurementUnit": "emmo:Kilogram",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["ChargingCurrent", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Standard Charge Current (A)'])
                    },
                    "hasMeasurementUnit": "emmo:Ampere",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["MaximumContinuousChargingCurrent", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN(row['Fast/Quick Charge Current'])
                    },
                    "hasMeasurementUnit": "emmo:Ampere",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                }
            ]
        
        if cell_format.lower() == "cylindricalcase":
            data["hasProperty"].extend([
                {
                    "@type": ["Height", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Height (mm)'])
                    },
                    "hasMeasurementUnit": "emmo:Metre",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["Diameter", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Diameter (mm)'])
                    },
                    "hasMeasurementUnit": "emmo:Metre",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                }
            ])            
        elif cell_format.lower() in ["pouchcase", "prismaticcase"]:
            data["hasProperty"].extend([
                {
                    "@type": ["Height", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Height (mm)'])
                    },
                    "hasMeasurementUnit": "emmo:Metre",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["Width", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Width (mm)'])
                    },
                    "hasMeasurementUnit": "emmo:Metre",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                },
                {
                    "@type": ["Length", "ConventionalProperty"],
                    "hasNumericalPart": {
                        "@type": "Real",
                        "hasNumericalValue": check_if_NaN_1000(row['Length (mm)'])
                    },
                    "hasMeasurementUnit": "emmo:Metre",
                    "schema:citation": {"@id": "https://github.com/TUMFTM/TruckBatteryDesign/raw/main/Step1_cell_selection/inputs/CellDatabase_v6.xlsx"}
                }
            ])



        fileName = 'BatteryTypeJson/' + str(row['Company Name']) + '_' + str(row['Part #']).replace('/', '_') + '.json'
        with open(fileName.replace(' ', '_'), 'w') as json_file:
            json.dump(data, json_file, ensure_ascii=False, indent=4)
        #return data, valid_citations #for debugging

# Create the JSON files
create_json_files(TumData)
#single_data = create_json_files(TumData)
#print(json.dumps(single_data, indent=4))


In [None]:

import os
import json
import requests
from urllib.parse import urlparse, unquote

def is_doi(url):
    """Check if a URL is a DOI."""
    parsed_url = urlparse(url)
    if 'doi.org' in parsed_url.netloc:
        return True
    return False

def get_doi_from_url(url):
    """Extract DOI from a DOI URL."""
    parsed_url = urlparse(url)
    if 'doi.org' in parsed_url.netloc:
        doi = parsed_url.path.lstrip('/')
        return unquote(doi)
    return None

def fetch_doi_metadata(doi):
    """Fetch metadata for a DOI from CrossRef API."""
    headers = {
        'Accept': 'application/vnd.citationstyles.csl+json'
    }
    url = f'https://doi.org/{doi}'
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json()
        else:
            print(f'Failed to fetch metadata for DOI {doi}: HTTP {response.status_code}')
    except requests.RequestException as e:
        print(f'Error fetching DOI metadata: {e}')
    return None

def update_json_file(file_path):
    """Update the JSON file with additional metadata for DOIs."""
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    subject_of = data.get('schema:subjectOf', [])
    updated = False

    for entry in subject_of:
        doi_url = entry.get('@id', '')
        if is_doi(doi_url):
            doi = get_doi_from_url(doi_url)
            if doi:
                metadata = fetch_doi_metadata(doi)
                if metadata:
                    # Update the entry with additional metadata
                    entry['schema:headline'] = metadata.get('title', '')
                    # Join authors' names
                    authors = metadata.get('author', [])
                    author_names = []
                    for author in authors:
                        given = author.get('given', '')
                        family = author.get('family', '')
                        full_name = f"{given} {family}".strip()
                        author_names.append(full_name)
                    entry['schema:author'] = ' and '.join(author_names)
                    # Add description if available
                    entry['schema:description'] = metadata.get('abstract', '')
                    # Add publication date if available
                    if 'published' in metadata and 'date-parts' in metadata['published']:
                        date_parts = metadata['published']['date-parts'][0]
                        if date_parts:
                            entry['schema:datePublished'] = date_parts[0]
                    updated = True

    if updated:
        # Write the updated data back to the file
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=4)
        print(f'Updated file: {file_path}')

def process_folder(folder_path):
    """Process all JSON files in the given folder."""
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            update_json_file(file_path)

if __name__ == '__main__':
    folder_path = 'BatteryTypeJson'  # Replace with your folder path
    process_folder(folder_path)