In [1]:
import json
import geojson
from data_fusion import DataFusion
import pandas as pd
import folium
import pprint
from IPython.display import display, HTML
from IPython.display import IFrame

# Data Fusion Pipeline

The following example illustrates the data fusion process to explore the relationship between aerial ir defect analysis and photovoltaic short-term performance in PV systems. First, we read in the defect mapping that maps the defect id to the defect name and make it into a dictionary to declare the master data fusion class.

In [2]:
# Read in defect mapping to map defect id to defect name
with open("zeitvew_defect_mapping.json") as json_file:
    defect_mapping_dict = json.load(json_file)

pprint.pprint(defect_mapping_dict)

{'1.1': 'Single Hotspot <10C',
 '1.2': 'Single Hotspot 10C-20C',
 '1.3': 'Single Hotspot >20C',
 '10': 'Damaged Module',
 '10.1': 'Broken Glass',
 '11': 'Isolated/ Underperforming Module',
 '12': 'Sub-string short circuit',
 '13': 'Misaligned Modules',
 '14': 'Suspected PID',
 '15': 'Soiling',
 '16': 'Delamination',
 '17': 'String Off-line',
 '2.1': 'Multi-Hotspots <10C',
 '2.2': 'Multi-Hotspots 10C-20C',
 '2.3': 'Multi-Hotspots >20C',
 '3': 'Diode Bypass',
 '4': 'Short Circuit',
 '5': 'Vegetation',
 '6': 'Junction Box',
 '7': 'Missing Module',
 '8': 'Shadow',
 '9': 'Inter-row/ Table Shading'}


In [3]:
# Read in the associated metadata for the sites

with open("metadata_dict.json") as json_file:
    metadata_dict = json.load(json_file)
    
# read in the metadata specifically for NREL RSF II
metadata = [x for x in metadata_dict if x['system_name'] == "NREL RSF II"][0]

pprint.pprint(metadata)

{'defect_geojson_path': './geojsons/NREL-zeitview-report.geojson',
 'latitude': 39.7409,
 'longitude': -105.171,
 'mount': 'fixed',
 'scan_date': '6/21/2023',
 'site_layout_geojson_path': './geojsons/NREL_RSF_II.geojson',
 'system_id': 1283,
 'system_name': 'NREL RSF II',
 'system_number_modules': 1866,
 'time_series_data_path': './full_time_series/C1283.csv'}


In [4]:
# Declare master data fusion class
data_fusion = DataFusion(defect_mapping_dict, by_site=True)  

## 1. Read GeoJSON Files

Now, we read in the site and aerial ir defect analysis geojsons. 

In [5]:
# Read in site geojson
with open(metadata["site_layout_geojson_path"]) as f:
    site_dict = geojson.load(f)
with open(metadata["defect_geojson_path"]) as f:
    aerial_defect_dict = geojson.load(f)
    
# Print the associated Geojson information
pprint.pprint(site_dict)

{"features": [{"geometry": {"coordinates": [[[-105.170311, 39.740977], [-105.170376, 39.74113], [-105.171912, 39.740811], [-105.171864, 39.740657], [-105.170311, 39.740977]]], "type": "Polygon"}, "properties": {}, "type": "Feature"}, {"geometry": {"coordinates": [[[-105.17083, 39.740714], [-105.170881, 39.74085], [-105.170934, 39.740839], [-105.170981, 39.740831], [-105.171057, 39.740821], [-105.171007, 39.740672], [-105.17083, 39.740714]]], "type": "Polygon"}, "properties": {}, "type": "Feature"}, {"geometry": {"coordinates": [[[-105.170221, 39.740371], [-105.170226, 39.74054], [-105.171472, 39.740544], [-105.171467, 39.740374], [-105.170221, 39.740371]]], "type": "Polygon"}, "properties": {}, "type": "Feature"}, {"geometry": {"coordinates": [[[-105.170772, 39.740588], [-105.170808, 39.740675], [-105.170986, 39.74064], [-105.170945, 39.740552], [-105.170883, 39.740562], [-105.170832, 39.740575], [-105.170772, 39.740588]]], "type": "Polygon"}, "properties": {}, "type": "Feature"}], "ty

Additionally, the site and aerial geojson layers can be mapped on a satellite image and saved as an html file. The green layer represents the site geojson and the red layer represents the aerial ir defect analysis geojson. Hovering over the red defect blocks displays the defect name.

Please note that we are only looking at the NREL RSF II installation (in green), not every installation on the NREL campus, so some defects are not mapped to a particular installation here.

In [6]:
site_coords = (metadata['latitude'], metadata['longitude'])
folium_file_path = "./NREL_RSF_II_folium_map.html"

# Maps defect name to defect id in aerial ir dictionary to get defect_name for folium tooltip
for defect in aerial_defect_dict["features"]:
        defect_id = defect["properties"]["defect_type_id"]
        defect_name = defect_mapping_dict[defect_id]
        defect["properties"]["defect_name"] = defect_name 
# Generate html
folium_map = data_fusion.generate_folium_graphic(metadata['system_name'],
                                                 site_coords, 
                                                 site_dict, 
                                                 aerial_defect_dict,
                                                 zoom=17)
folium_map.save('NREL_RSFII_folium_map.html')

IFrame(src="NREL_RSFII_folium_map.html", width=800, height=350)

## 2. Fuse Aerial IR Defect Analysis with Site 

After reading in the geojson files, we merge the site and aerial defect information to isolate defects by inverter blocks. The merging returns a dataframe where each row is a defect found, mapping to a particular area in the site dictionary.

In [7]:
# Merge defect dictionary with site layout dictionary using DatFusion class
defect_df = data_fusion.merge_aerial_site_dictionary(aerial_defect_dict, site_dict, defect_mapping_dict, metadata)
defect_df.head(10)

Unnamed: 0,system_id,defect_id,defect_name,inv_block_polygon,defect_polygon,defect_area
0,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.170696 39.741033, -105.170677 3...",1.83e-10
1,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.170883 39.740995, -105.170864 3...",1.72e-10
2,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171811 39.740786, -105.171793 3...",1.74e-10
3,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.170861 39.74099, -105.170842 39...",1.83e-10
4,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171796 39.740798, -105.171777 3...",1.72e-10
5,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171805 39.740824, -105.171786 3...",1.72e-10
6,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171026 39.740947, -105.171008 3...",1.635e-10
7,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171051 39.74096, -105.171032 39...",1.72e-10
8,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.170496 39.741092, -105.170478 3...",1.56e-10
9,1283,3,Diode Bypass,"POLYGON ((-105.170311 39.740977, -105.170376 3...","POLYGON ((-105.171425 39.740884, -105.171406 3...",1.72e-10


## 3. Aggregate defects and get defect percentage

Now, we normalize the site and defect area, and subsequently calculate the percentage of the site that contains a particular defect. By finding the defect percentage, we can make comparisons of defects easily across the site.

In [8]:
pct_defect_df = data_fusion.aggregate_defects(defect_df, metadata['system_number_modules'])

pct_defect_df[["system_id", "defect_name", "defect_count", "defect_percentage"]].drop_duplicates()

Unnamed: 0,system_id,defect_name,defect_count,defect_percentage
0,1283,Diode Bypass,30,1.607717
12,1283,Soiling,3,0.160772
26,1283,Isolated/ Underperforming Module,20,1.071811
41,1283,String Off-line,6,0.321543


#### 4. Plot Short Term Performance Time Series

After fusing and aggregating the site and aerial defect analysis, we can plot the site time series datastreams to analyze its short-term behavior with defects. Here, we defined short-term defects to be "Misaligned Modules" or "String Off-line". We can also plot the short term PV performace by individual AC power datastreams or aggregated AC power datastream. 

In [9]:

# Read in time series data                                      
time_series_df = pd.read_csv(metadata['time_series_data_path'], index_col=0, parse_dates=True)

ac_power_columns = [x for x in list(time_series_df.columns) if 'ac_power' in x]
time_series_df = time_series_df[ac_power_columns] 

fig = data_fusion.visualize_short_term_performance(time_series_df,
                                                   metadata['scan_date'],
                                                   day_window=7)
fig.write_html('NREL_RSFII_short_term_performance.html')

IFrame(src='NREL_RSFII_short_term_performance.html', width=1100, height=500)