# Test Notebook für Funktionen und komplexe Analysen

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import langdetect
import re

from collections import Counter
from collections import OrderedDict
from langdetect import detect

import torch

import sys
import os
import warnings
warnings.filterwarnings('ignore')
sys.path.append(os.path.abspath("../"))
import src.utilities as u
import src.plots as p
import src.constants as c
c_stopwords = c.custom_stopwords

from src.utilities import TextCleaner
from src.credentials import AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import AzureOpenAIEmbeddings


from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings



### Daten Laden

Die Daten, welche wir aus einer `.csv` Datei laden, welche von einer Website stammen. Mit diesen Daten möchten wir später unsere Fragen beantworten.

#### Datenbeschreibung
- **id** Unique ID des Artikels
- **title** Title des Artikels
- **date** Erscheinungsdatum des Artikels
- **author** Name des Autors
- **content** Inhalt des Artikels
- **domain** von wo der Artikel kommt
- **url** Webadresse (wie man den Artikel finden kann)

In [2]:
file_path = "../data_mc1/"
file_name_train = 'cleantech_media_dataset_v3_2024-10-28.csv'
# full_path = os.path.join(file_path, file_name_train)

column_names = ["id", "title", "date", "author", "content", "domain", "url"]
# df_train = pd.read_csv(full_path, delimiter=",", names=column_names, skiprows=1, encoding="utf-8")
df_train = pd.read_csv(file_path+file_name_train, delimiter=",", names=column_names, skiprows=1, encoding="utf-8")

Der Spalte **author ist vollständig fehlend**, nach Ansehen des Artikels ist schnell ersichtlich, dass dieser über Webscraping aus den Artikeln extrahiert werden könnte. Da aber in den Evaluierungsdaten keine Frage bezüglich des Authors vorhanden ist wird diese nun entfernt. Allerdings könnte diese Information Wert liefern im Bezug auf Ähnlichkeitsberechnung von Inhalten der Artikeln, dies könnte später erneut überprüft werden. **Vorerst wird die Spalte author erntfernt.**

In [3]:
df_train = df_train.drop(columns=["author"])

Datumsformat erstellen

In [4]:
# Datentypen korrigieren
df_train["date"] = pd.to_datetime(df_train["date"], errors="coerce")  # Falls ungültige Datumswerte vorhanden
# df_train["title"] = df_train["title"].astype(str)
# df_train["content"] = df_train["content"].astype(str)

In [5]:
def fake_list_to_text(text):
    import re
    if not isinstance(text, str):
        return text
    if text.count("',") >= 1 and "'" in text:
        parts = re.split(r"\s*,\s*", text.strip("[]"))
        parts = [p.strip(" '\"\n") for p in parts if p.strip()]
        return " ".join(parts)
    return text

In [6]:
df_train["content"] = df_train["content"].apply(fake_list_to_text)
df_train.head()

Unnamed: 0,id,title,date,content,domain,url
0,93320,"XPeng Delivered ~100,000 Vehicles In 2021",2022-01-02,Chinese automotive startup XPeng has shown one...,cleantechnica,https://cleantechnica.com/2022/01/02/xpeng-del...
1,93321,Green Hydrogen: Drop In Bucket Or Big Splash?,2022-01-02,Sinopec has laid plans to build the largest gr...,cleantechnica,https://cleantechnica.com/2022/01/02/its-a-gre...
2,98159,World’ s largest floating PV plant goes online...,2022-01-03,Huaneng Power International has switched on a ...,pv-magazine,https://www.pv-magazine.com/2022/01/03/worlds-...
3,98158,Iran wants to deploy 10 GW of renewables over ...,2022-01-03,According to the Iranian authorities there are...,pv-magazine,https://www.pv-magazine.com/2022/01/03/iran-wa...
4,31128,Eastern Interconnection Power Grid Said ‘ Bein...,2022-01-03,Sign in to get the best natural gas news and d...,naturalgasintel,https://www.naturalgasintel.com/eastern-interc...


In [7]:
# id_list = [42253,
# 22248,
# 105173,
# 31316,
# 43019,
# 31297,
# 31273,
# 31163,
# 31158,
# 31140,
# 126129,
# 6941]
# df_train = df_train[df_train["id"].isin(id_list)]

In [8]:
# df_train

In [9]:
# cleaner = TextCleaner(level="safe")
# data_cleaned = cleaner.clean_text_column(df_train, column="content", new_column="content_cleaned")


In [10]:
cleaner = TextCleaner(level="risky")
data_cleaned = cleaner.clean_text_column(df_train, column="content", new_column="content_cleaned")

In [11]:
# cleaner = TextCleaner(level="dangerous")
# data_cleaned = cleaner.clean_text_column(df_train, column="content", new_column="content_cleaned")

In [12]:
data_cleaned_compare = data_cleaned[["content", "content_cleaned"]].copy()
u.styled_text(data_cleaned_compare.head(), max_width=500)

Unnamed: 0,content,content_cleaned
0,Chinese automotive startup XPeng has shown one of the most dramatic auto production ramp-ups in hist ory and the good news is it only produces 100% sma rt electric vehicles ( EVs). At a mere 7 years of age and just a few years after launching its first ...,Chinese automotive startup XPeng has shown one of the most dramatic auto production ramp ups in hist ory and the good news is it only produces 100 smar t electric vehicles EVs. At a mere 7 years of age  and just a few years after launching its first ve ...
1,Sinopec has laid plans to build the largest green hydrogen production facility in the world but inte rest in fossil-sourced hydrogen continues apace. S kepticism abounds over the green hydrogen field an d yet leading players on the global energy stage s ...,Sinopec has laid plans to build the largest green hydrogen production facility in the world but inte rest in fossil sourced hydrogen continues apace. S kepticism abounds over the green hydrogen field an d yet leading players on the global energy stage s ...
2,Huaneng Power International has switched on a 320 MW floating PV array in China’ s Shandong province . It deployed the plant in two phases on a reservo ir near its 2.65 GW Dezhou thermal power station. Huaneng Power International ( HPI) has completed t ...,Huaneng Power International has switched on a 320 MW floating PV array in China' s Shandong province . It deployed the plant in two phases on a reservo ir near its 2.65 GW Dezhou thermal power station. Huaneng Power International HPI has completed the ...
3,According to the Iranian authorities there are cur rently more than 80GW of renewable energy projects  that were submitted by private investors for revi ew. Iran had around 414MW of installed solar power  at the end of 2020. The Iranian Energy Ministry a ...,According to the Iranian authorities there are cur rently more than 80GW of renewable energy projects  that were submitted by private investors for revi ew. Iran had around 414MW of installed solar power  at the end of 2020. The Iranian Energy Ministry a ...
4,Sign in to get the best natural gas news and data.  Follow the topics you want and receive the daily emails. Your email address * Your password * Remem ber me Continue Reset password Featured Content Ne ws & Data Services Client Support Daily GPI Infras ...,"Follow the topics you want and However the plann ers known as the Eastern Interconnection Planning Collaborative EIPC added a cautionary note. EIPC'  s Zach Smith executive committee chairman said pl anners concluded that "" the Eastern Interconnectio ..."


In [13]:
u.print_full_text(data_cleaned.head(1))

Unnamed: 0,id,title,date,content,domain,url,content_cleaned
0,93320,"XPeng Delivered ~100,000 Vehicles In 2021",2022-01-02 00:00:00,Chinese automotive startup XPeng has shown one of the most dramatic auto production ramp-ups in history and the good news is it only produces 100% smart electric vehicles ( EVs). At a mere 7 years of age and just a few years after launching its first vehicle ( the XPeng G3 went on sales in December 2018) XPeng has wrapped up a year of sales totaling almost 100 000. In 2021 XPeng delivered 98 155 smart EVs a 263% year-over-year increase. In December 16 000 vehicles were delivered a 181% year-over-year increase compared to December 2020. There were 41 751 vehicles delivered in Q4 2021 which was a 222% year-over-year increase. Reinforcing how impressive the 98 155 delivery figure for 2021 is at the end of the year XPeng reached just 137 953 cumulative deliveries. All the more impressive XPeng’ s monthly delivery target is 15 000 vehicles and December was the second month in a row that was surpassed — and that’ s despite the global automotive chip shortage and other supply chain challenges. In the case of the P7 deliveries were up 102%. GS deliveries were up 75%. The P5 was not on the market one year ago but its sales did increase 134% month over month. XPeng also noted that its broader charging and sales network in China has been growing “ rapidly. ” It had “ 661 branded supercharging stations across 228 cities and 311 physical retail stores in operation across 121 cities as of the end of November 2021. ” All in all XPeng’ s growth seems to be rolling out as the company hoped and planned or even better. It’ s quite an uplifting sight to see another pure EV company scaling up production rapidly and achieving great sales success at a young age. Of course the reference company need not be named and we need not compare the two or pretend XPeng should exactly follow Tesla’ s ( whoops) path. However the point is that we are getting more examples of pure EV companies achieving great successes. Zach is tryin ' to help society help itself one word at a time. He spends most of his time here on CleanTechnica as its director chief editor and CEO. Zach is recognized globally as an electric vehicle solar energy and energy storage expert. He has presented about cleantech at conferences in India the UAE Ukraine Poland Germany the Netherlands the USA Canada and Curaçao. Zach has long-term investments in Tesla [ TSLA ] NIO [ NIO ] Xpeng [ XPEV ] Ford [ F ] ChargePoint [ CHPT ] Amazon [ AMZN ] Piedmont Lithium [ PLL ] Lithium Americas [ LAC ] Albemarle Corporation [ ALB ] Nouveau Monde Graphite [ NMGRF ] Talon Metals [ TLOFF ] Arclight Clean Transition Corp [ ACTC ] and Starbucks [ SBUX ]. But he does not offer ( explicitly or implicitly) investment advice of any sort. Advertise with CleanTechnica to get your company in front of millions of monthly readers. Rethink Energy has made the prediction that some countries’ entire automotive fleets will be electric within the next decade. Policy changes in various countries... Tesla hits the UK with its strongest ever May helping lift the plugin electric vehicle market share to 23.1% up from 18.3% year on... The auto market in Germany saw plugin EVs take 22.9% share in May 2023 down from 25.3% year on year. Full electrics gained share ... Recently we did a story about the 2023 NADA convention and goat roping in Dallas where Slate correspondent Alexander Sammon got a chance to... Copyright © 2023 CleanTechnica. The content produced by this site is for entertainment purposes only. Opinions and comments published on this site may not be sanctioned by and do not necessarily represent the views of CleanTechnica its owners sponsors affiliates or subsidiaries.,cleantechnica,https://cleantechnica.com/2022/01/02/xpeng-delivered-100000-vehicles-in-2021/,"Chinese automotive startup XPeng has shown one of the most dramatic auto production ramp ups in history and the good news is it only produces 100 smart electric vehicles EVs. At a mere 7 years of age and just a few years after launching its first vehicle the XPeng G3 went on sales in December 2018 XPeng has wrapped up a year of sales totaling almost 100 000. In 2021 XPeng delivered 98 155 smart EVs a 263 year over year increase. In December 16 000 vehicles were delivered a 181 year over year increase compared to December 2020. There were 41 751 vehicles delivered in Q4 2021 which was a 222 year over year increase. Reinforcing how impressive the 98 155 delivery figure for 2021 is at the end of the year XPeng reached just 137 953 cumulative deliveries. All the more impressive XPeng' s monthly delivery target is 15 000 vehicles and December was the second month in a row that was surpassed and that' s despite the global automotive chip shortage and other supply chain challenges. In the case of the P7 deliveries were up 102. GS deliveries were up 75. The P5 was not on the market one year ago but its sales did increase 134 month over month. XPeng also noted that its broader charging and sales network in China has been growing "" rapidly. "" It had "" 661 branded supercharging stations across 228 cities and 311 physical retail stores in operation across 121 cities as of the end of November 2021. "" All in all XPeng' s growth seems to be rolling out as the company hoped and planned or even better. It' s quite an uplifting sight to see another pure EV company scaling up production rapidly and achieving great sales success at a young age. Of course the reference company need not be named and we need not compare the two or pretend XPeng should exactly follow Tesla' s whoops path. However the point is that we are getting more examples of pure EV companies achieving great successes. Zach is tryin ' to help society help itself one word at a time. He spends most of his time here on CleanTechnica as its director chief editor and CEO. Zach is recognized globally as an electric vehicle solar energy and energy storage expert. He has presented about cleantech at conferences in India the UAE Ukraine Poland Germany the Netherlands the USA Canada and Curaao. Zach has long term investments in Tesla TSLA NIO NIO Xpeng XPEV Ford F ChargePoint CHPT Amazon AMZN Piedmont Lithium PLL Lithium Americas LAC Albemarle Corporation ALB Nouveau Monde Graphite NMGRF Talon Metals TLOFF Arclight Clean Transition Corp ACTC and Starbucks SBUX . But he does not offer explicitly or implicitly investment advice of any sort. Rethink Energy has made the prediction that some countries' entire automotive fleets will be electric within the next decade. Policy changes in various countries... Tesla hits the UK with its strongest ever May helping lift the plugin electric vehicle market share to 23.1 up from 18.3 year on... The auto market in Germany saw plugin EVs take 22.9 share in May 2023 down from 25.3 year on year. Full electrics gained share ... Recently we did a story about the 2023 NADA convention and goat roping in Dallas where Slate correspondent Alexander Sammon got a chance to... ."


In [14]:
# data_cleaned = u.clean_url_column(data_cleaned, 'url')

In [15]:
u.print_full_text(df = data_cleaned[data_cleaned['id'] == 105173])


Unnamed: 0,id,title,date,content,domain,url,content_cleaned
3497,105173,Global energy storage market 'growing pains ' won't stunt 15-fold boom by 2030: BNEF,2022-10-14 00:00:00,Global energy storage markets will together grow 15-fold to 411GW ( 1.19TWh) by the end of the decade boosted by recent policy shifts in the US and Europe although supply chain constraints might slow additions according to the latest forecast from BloombergNEF ( BNEF). The research house sees 13% more capacity – 46GW ( 145GWh) – than previously estimated being built given extra drive by the US Inflation Reduction Act ( IRA) which has earmarked $ 369bn for clean technologies and EU’ s REPowerEU plan which has raised ambitions to cut reliance on gas from Russia. BNEF’ s 2H 2022 Energy Storage Market Outlook estimates roughly 30GW ( 111GWh) of energy storage build through to 2030 although supply chain bottlenecks “ cloud deployment expectations until 2024 ”. In the utility-scale sector significant storage additions expected from 2025 onwards align with raised renewable targets outlined in the REPowerEU plan and a renewed focus on energy security in the UK according to BNEF which has more than doubled its estimates for deployments in the second half of this decade across Europe. In Europe Russia’ s invasion of Ukraine has impacted on energy storage developments with record electricity prices forcing consumers to consider new forms of energy supply supporting near-term growth in the residential segment. While scale-up of global energy storage capacity is imminent BNEF cautions that supply chain hurdles remain an impediment for the industry. On top of pandemic-related issues inflation high transport costs and raw material prices have made battery cells more expensive over the last year. Meanwhile projects face long lead times to finance develop and commission. So far in 2022 supply chain disruptions have resulted in lower utility-scale storage additions said BNEF noting that while a lot of these pressures would ikely ease next year scaling up for a vastly larger market in 2030 “ will certainly come with challenges ”. “ The energy storage industry is facing growing pains. Yet despite higher battery system prices demand is clear ” said Helen Kou lead author of the report and an energy storage associate at BNEF. The US and China are set to remain the two largest energy storage markets through 2030 representing more than half of global installations. “ Europe however is catching up with a significant ramp-up in capacity fuelled by the current energy crisis ” said Kou. Regionally Asia Pacific will lead storage build on a megawatt-basis by 2030 with momentum underpinned by the rapidly scaling market in China. The Americas will add more capacity on a megawatt-hour basis as storage plants in the US usually have more hours of storage. Africa Europe even with additional upside from recent policy advances and the Middle East are all set to lag Asia Pacific and the Americas according to BNEF. The firm’ s forecast suggests the majority of energy storage build by 2030 equivalent to 61% of total megawattage will be to provide so-called energy shifting – advancing or delaying the time of electricity dispatch. Co-located renewables-plus-storage projects in particular solar-plus-storage are becoming commonplace globally. BNEF noted that rapidly evolving battery technology is also driving the energy storage market. Lithium-ion batteries presently account for the majority of installations but many non-battery technologies are under development such as compressed air and thermal energy storage. Still it expects that batteries will dominate the market at least until the 2030s in large part due to their price competitiveness established supply chain and significant track record. “ If new technologies can successfully outcompete lithium-ion then total energy storage uptake may well be larger ” said Kou. Recharge is part of DN Media Group. To read more about DN Media Group click here Recharge is part of DN Media Group AS. From November 1st DN Media Group is responsible for controlling your data on Recharge. We use your data to ensure you have a secure and enjoyable user experience when visiting our site. You can read more about how we handle your information in our privacy policy. DN Media Group is the leading news provider in the shipping seafood and energy industries with a number of English- and Norwegian-language news publications across a variety of sectors. Read more about DN Media Group here. Recharge is part of NHST Global Publications AS and we are responsible for the data that you register with us and the data we collect when you visit our websites. We use cookies in a variety of ways to improve your experience such as keeping NHST websites reliable and secure personalising content and ads and to analyse how our sites are being used. For more information and how to manage your privacy settings please refer to our privacy and cookie policies.,rechargenews,https://www.rechargenews.com/markets/global-energy-storage-market-growing-pains-wont-stunt-15-fold-boom-by-2030-bnef/2-1-1334329,Global energy storage


In [16]:
file_name_test = 'cleantech_rag_evaluation_data_2024-09-20.csv'
# full_path = os.path.join(file_path, file_name_train)

column_names = ["example_id", "question_id", "question", "relevant_text", "answer", "article_url"]
# df_train = pd.read_csv(full_path, delimiter=",", names=column_names, skiprows=1, encoding="utf-8")
# df_train = pd.read_csv(file_path+file_name_test, delimiter=",", names=column_names, skiprows=1, encoding="utf-8")
df_test = pd.read_csv(file_path + file_name_test, delimiter=";", names=column_names, skiprows=1, encoding="utf-8")


In [17]:
# u.print_full_text(df_test)

In [18]:
df_pv_magazine = df_train[df_train['url'] == 'https://www.pv-magazine.com/2023/04/08/high-time-for-solar/#comments']

In [19]:
u.print_full_text(df_pv_magazine)

Unnamed: 0,id,title,date,content,domain,url,content_cleaned
6283,98906,Weekend read: High time for solar – pv magazine International,2023-04-08 00:00:00,Cannabis prohibition drove a culture of clandestine production in the past and solar helped growers to cultivate it at remote off-grid sites. Now as a global commercial cannabis market emerges solar has an even bigger role to play. Legal cannabis cultivation could be a big opportunity for solar. Solar energy and cannabis cultivation are old bedfellows. PV pioneer John Schaeffer has even credited solar with facilitating the northern California cannabis industry which in turn supported the nascent PV sector. Now as the legalization of medical and recreational cannabis gathers pace solar continues to perform a key role. Cannabis cultivation can take place outdoors indoors or in greenhouses. While outdoor cultivation worked for millennia the growth of the industry and increased demand for higher-quality product – plus tight profit margins – have prompted producers to go indoors where ideal environments can be replicated. Indoor growth gives control over environmental factors and flowering periods. More importantly it means consistent year-round harvests. However the high-powered lights and the heating ventilation and air conditioning equipment needed to control temperature and humidity come with a Sasquatch-sized carbon footprint. A 2022 report from the United Nations Office of Drugs and Crime ( UNODC) estimated climate control measures represent more than 80% of the carbon footprint of indoor cannabis production. UNODC estimated that the carbon footprint is 16 times to 100 times larger than for outdoor cultivation. Indoor “ factory farming ” is incompatible with environmental social and corporate governance ( ESG) standards says Evan Mills principal at engineering consultancy Energy Associates and a former senior scientist at the Lawrence Berkeley National Laboratory. Mills spelled out the problem in a paper published in “ Energy Policy ” in 2012 the year Colorado and Washington state legalized cannabis and spurred a domino effect elsewhere. Even then he estimated indoor cultivation accounted for 1% of total US electricity use for a carbon footprint equivalent to three million cars. The scientist estimated 42% of cultivators grew exclusively indoors by 2020 often in multiple sprawling Walmart-scale factories with energy consumption comparable to data centers. Unfortunately installing large scale solar on site isn’ t an easy solution. According to Mills’ recent research into a proposed indoor cannabis industrial park in Blythe California the amount of solar needed to achieve net zero energy supply would cover 1 400 acres – far more land than would be necessary if the cannabis were grown outdoors. While entirely valid Mills’ research suggests an erroneous equivalency between indoor- and outdoor-grown cannabis. After all anyone can brew up a big batch of moonshine in a bathtub but it’ s not so easy to produce a fine single-malt Scotch at scale. Indoor cannabis is a premium product and in a state like California where people can still easily access a black market at least double the size of the regulated ( and taxable) industry commercial cannabis companies are incentivized to produce premium products only cultivable in a climate-controlled environment. Large-scale outdoor farming is also fraught with risk. In October or “ Croptober ” as it is known all of California’ s outdoor-grown cannabis is harvested. This single harvest if it has not been spoiled by environmental impacts such as wildfires is only attractive for consumers for several months. If the crop fails or people decide to use cannabis in the other nine months of the year well they’ ll turn to an indoor site or greenhouse. Despite the alarming energy intensity of indoor cultivation the sheer roofspace of grow houses is an ideal solar platform. California-based Canndescent boasts more than 100 000 square feet of indoor cultivation space and annually produces almost 17 tons of cannabis. In 2019 Canndescent installed a 282.5 kW solar system at its facility in Desert Hot Springs. Canndescent Senior Director of Compliance Andrew Mochulsky tells pv magazine the Colorado Desert’ s unrelenting sunshine and limited cloud cover make solar a no-brainer. “ We’ re in the heart of solar and wind country so it made sense to bring solar online ” he says. “ We also think it’ s just the right thing to do. ” Indoor cultivation Mochulsky says is “ not trying to replace the sun but improve upon it to create rigid 12-hour midday sun conditions which are physically impossible outside. Because of that you have to control the environment so it’ s very thirsty for electricity. ” Canndescent’ s solar offsets 25% to 35% of the company’ s electricity consumption according to Mochulsky depending on the time of year. “ It’ s a great investment ” he adds. “ And if we can get closer to a 1:1 ratio of canopy space it would have a substantial impact on our power costs. ” The inclusion of solar shades and carports also provides a “ quality of life benefit ” for employees Mochulsky says. “ So people have a nice shaded place to sit. ” Despite such benefits only a small minority of North American indoor growers have gone solar. “ We are an outlier ” says Mochulsky. “ I can not say that it is the norm. Even here in Desert Hot Springs we are the only one and that is in sharp contrast to the residential market where every rooftop has solar on it. Solar makes abundant sense but it entails a cost upfront. ” The Canndescent director says tight cannabis margins are a factor. “ Even with an ROI [ return on investment for solar ] of five years the market is soft [ costs ] are low – except to operate – and the liquidity is very tight. ” While cannabis legalization in the US is progressing state by state and president Joe Biden has directed his secretary of health and human services and the attorney general to review cannabis scheduling under federal law the market remains in legal limbo at national level making financing solar difficult. “ There are a lot of prevailing headwinds for cannabis to adopt more solar ” says Mochulsky. “ Access to a lot of financial instruments is just not available. We can not get a standard loan from the standard banks. We can’ t for example mortgage a company. Cannabis companies don’ t have availability to things like bankruptcy. Federal illegality also means we don’ t have access to state and federal tax-credit programs [ such as the Inflation Reduction Act ]. Investors are much more attuned to that degree of risk. After all if a company can’ t restructure it has to fail. ” There are more examples of solar adoption however. Much further north Freedom Cannabis tapped AltaPro Electric to design and install a 1.83 MW system atop its facility in Acheson Alberta in 2020. “ It’ s Canada’ s largest operational rooftop PV installation ” says AltaPro Chief Financial Officer David DeBruin. Freedom Cannabis is reaping the rewards from betting on solar. “ The margins in the indoor grow operations are very competitive and chopping down one of the largest opex [ operating expenditure ] items is a great way that Freedom Cannabis has been able to be a leader in the industry ” DeBruin tells pv magazine. He says other cannabis companies have made inquiries about solar but as in the US “ it seems the clients are held back due to financing becoming harder to attain from lenders. It’ s too bad because the cost of borrowing is dwarfed by the savings many times making any loan to install solar cash flow positive in year one. ” Cannabis is not limited to medical and recreational use. Hemp is one of the most versatile crops on the planet – and it’ s making a big comeback. As hemp is not grown for psychoactive component tetrahydrocannabinol ( THC) its farming is far less intensive and finicky than for example the high-THC flower grown by Canndescent in California. This makes hemp a potentially ideal agrivoltaic crop. In Melz Germany agrivoltaic developer SunFarming is currently trialing the cultivation of hemp under solar panels. “ All the plants without exception have grown well and developed excellently ” says Rafael Dulon founder and managing director of Hanf Farm. Dulon says the panels also help with mold a key concern for hemp farmers. Mold becomes a problem in autumn as temperatures cool and precipitation on the plants fails to dry. “ The rain protection for the plants provided by the PV system works wonderfully ” Dulon says. Another less-energy-intensive growing option is a greenhouse. However the need for natural sunlight makes rooftop solar less attractive on such structures. Building-integrated photovoltaics ( BIPV) are improving rapidly but products such as solar facades for greenhouses may be years away. In the meantime US-based nanomaterials innovator UbiQD’ s agricultural arm UbiGro – and solar panel manufacturer Heliene – have entered a joint development agreement for light-optimizing energy-producing modules designed for agrivoltaic greenhouses. UbiGro’ s translucent greenhouse film is integrated with photoluminescent particles that convert light into a preferable wavelength and can easily be combined with a solar module. UbiQD founder and CEO Hunter McDaniel tells pv magazine that if the panels only partially shade the greenhouse the yield lost to shading can be made up by the spectrum improvement provided by the film. McDaniel adds that while indoor growers have thus far managed to keep the scope of their energy intensity quiet – as opposed to similarly emergent energy-intensive industries such as crypto-mining – indoor growth is likely to shrink. Scientist Mills’ research supports this idea as the number of growers who operate primarily indoors fell from 80% to 60% between 2016 and 2020. With outdoor growth too inconsistent and indoor growth shrinking the hybrid option of greenhouses is likely to proliferate in the future. The cannabis industry trend of solar uptake is continuing in US states where the plant has been legalized more recently such as New York. Nate VerHague market development manager at New York-based installer Solar Liberty tells pv magazine the new cannabis market “ is going to have a ton of potential for solar. ” VerHague notes cannabis operations in New York state ( NYS) are only just getting up and running. “ There is going to be some extreme utility costs when it comes to electricity for these large operations ” he says. “ It is an ideal customer for solar as these facilities typically have a lot of roof space to utilize. ” While the nascent nature of the NYS market means Solar Liberty is yet to install PV for a cannabis company VerHague confirms the installer is “ in the quoting process with some organizations ” with the market looking promising from this year onward. National and regional governments the world over are shaping to follow in the footsteps of the US Canada Mexico Thailand Uruguay and others. Germany could legalize cannabis completely within a few years a development which would not only make it the world’ s single largest legal market but also considering its European centrality the world’ s “ dankest ” domino as neighbors would be expected to follow. Cannabis is becoming big business and given its energy needs could be big business for solar too. This content is protected by copyright and may not be reused. If you want to cooperate with us and would like to reuse some of our content please contact: editors @ pv-magazine.com. Please be mindful of our community standards. Your email address will not be published. Required fields are marked * Save my name email and website in this browser for the next time I comment. By submitting this form you agree to pv magazine using your data for the purposes of publishing your comment. Your personal data will only be disclosed or otherwise transmitted to third parties for the purposes of spam filtering or if this is necessary for technical maintenance of the website. Any other transfer to third parties will not take place unless this is justified on the basis of applicable data protection regulations or if pv magazine is legally obliged to do so. You may revoke this consent at any time with effect for the future in which case your personal data will be deleted immediately. Otherwise your data will be deleted if pv magazine has processed your request or the purpose of data storage is fulfilled. Further information on data privacy can be found in our Data Protection Policy. This website uses cookies to anonymously count visitor numbers. View our privacy policy. × The cookie settings on this website are set to `` allow cookies '' to give you the best browsing experience possible. If you continue to use this website without changing your cookie settings or you click `` Accept '' below then you are consenting to this.,pv-magazine,https://www.pv-magazine.com/2023/04/08/high-time-for-solar/#comments,"Cannabis prohibition drove a culture of clandestine production in the past and solar helped growers to cultivate it at remote off grid sites. Now as a global commercial cannabis market emerges solar has an even bigger role to play. Legal cannabis cultivation could be a big opportunity for solar. Solar energy and cannabis cultivation are old bedfellows. PV pioneer John Schaeffer has even credited solar with facilitating the northern California cannabis industry which in turn supported the nascent PV sector. Now as the legalization of medical and recreational cannabis gathers pace solar continues to perform a key role. Cannabis cultivation can take place outdoors indoors or in greenhouses. While outdoor cultivation worked for millennia the growth of the industry and increased demand for higher quality product plus tight profit margins have prompted producers to go indoors where ideal environments can be replicated. Indoor growth gives control over environmental factors and flowering periods. More importantly it means consistent year round harvests. However the high powered lights and the heating ventilation and air conditioning equipment needed to control temperature and humidity come with a Sasquatch sized carbon footprint. A 2022 report from the United Nations Office of Drugs and Crime UNODC estimated climate control measures represent more than 80 of the carbon footprint of indoor cannabis production. UNODC estimated that the carbon footprint is 16 times to 100 times larger than for outdoor cultivation. Indoor "" factory farming "" is incompatible with environmental social and corporate governance ESG standards says Evan Mills principal at engineering consultancy Energy Associates and a former senior scientist at the Lawrence Berkeley National Laboratory. Mills spelled out the problem in a paper published in "" Energy Policy "" in 2012 the year Colorado and Washington state legalized cannabis and spurred a domino effect elsewhere. Even then he estimated indoor cultivation accounted for 1 of total US electricity use for a carbon footprint equivalent to three million cars. The scientist estimated 42 of cultivators grew exclusively indoors by 2020 often in multiple sprawling Walmart scale factories with energy consumption comparable to data centers. Unfortunately installing large scale solar on site isn' t an easy solution. According to Mills' recent research into a proposed indoor cannabis industrial park in Blythe California the amount of solar needed to achieve net zero energy supply would cover 1 400 acres far more land than would be necessary if the cannabis were grown outdoors. While entirely valid Mills' research suggests an erroneous equivalency between indoor and outdoor grown cannabis. After all anyone can brew up a big batch of moonshine in a bathtub but it' s not so easy to produce a fine single malt Scotch at scale. Indoor cannabis is a premium product and in a state like California where people can still easily access a black market at least double the size of the regulated and taxable industry commercial cannabis companies are incentivized to produce premium products only cultivable in a climate controlled environment. Large scale outdoor farming is also fraught with risk. In October or "" Croptober "" as it is known all of California' s outdoor grown cannabis is harvested. This single harvest if it has not been spoiled by environmental impacts such as wildfires is only attractive for consumers for several months. If the crop fails or people decide to use cannabis in the other nine months of the year well they' ll turn to an indoor site or greenhouse. Despite the alarming energy intensity of indoor cultivation the sheer roofspace of grow houses is an ideal solar platform. California based Canndescent boasts more than 100 000 square feet of indoor cultivation space and annually produces almost 17 tons of cannabis. In 2019 Canndescent installed a 282.5 kW solar system at its facility in Desert Hot Springs. Canndescent Senior Director of Compliance Andrew Mochulsky tells pv magazine the Colorado Desert' s unrelenting sunshine and limited cloud cover make solar a no brainer. "" We' re in the heart of solar and wind country so it made sense to bring solar online "" he says. "" We also think it' s just the right thing to do. "" Indoor cultivation Mochulsky says is "" not trying to replace the sun but improve upon it to create rigid 12 hour midday sun conditions which are physically impossible outside. Because of that you have to control the environment so it' s very thirsty for electricity. "" Canndescent' s solar offsets 25 to 35 of the company' s electricity consumption according to Mochulsky depending on the time of year. "" It' s a great investment "" he adds. "" And if we can get closer to a 11 ratio of canopy space it would have a substantial impact on our power costs. "" The inclusion of solar shades and carports also provides a "" quality of life benefit "" for employees Mochulsky says. "" So people have a nice shaded place to sit. "" Despite such benefits only a small minority of North American indoor growers have gone solar. "" We are an outlier "" says Mochulsky. "" I can not say that it is the norm. Even here in Desert Hot Springs we are the only one and that is in sharp contrast to the residential market where every rooftop has solar on it. Solar makes abundant sense but it entails a cost upfront. "" The Canndescent director says tight cannabis margins are a factor. "" Even with an ROI return on investment for solar of five years the market is soft costs are low except to operate and the liquidity is very tight. "" While cannabis legalization in the US is progressing state by state and president Joe Biden has directed his secretary of health and human services and the attorney general to review cannabis scheduling under federal law the market remains in legal limbo at national level making financing solar difficult. "" There are a lot of prevailing headwinds for cannabis to adopt more solar "" says Mochulsky. "" Access to a lot of financial instruments is just not available. We can not get a standard loan from the standard banks. We can' t for example mortgage a company. Cannabis companies don' t have availability to things like bankruptcy. Federal illegality also means we don' t have access to state and federal tax credit programs such as the Inflation Reduction Act . Investors are much more attuned to that degree of risk. After all if a company can' t restructure it has to fail. "" There are more examples of solar adoption however. Much further north Freedom Cannabis tapped AltaPro Electric to design and install a 1.83 MW system atop its facility in Acheson Alberta in 2020. "" It' s Canada' s largest operational rooftop PV installation "" says AltaPro Chief Financial Officer David DeBruin. Freedom Cannabis is reaping the rewards from betting on solar. "" The margins in the indoor grow operations are very competitive and chopping down one of the largest opex operating expenditure items is a great way that Freedom Cannabis has been able to be a leader in the industry "" DeBruin tells pv magazine. He says other cannabis companies have made inquiries about solar but as in the US "" it seems the clients are held back due to financing becoming harder to attain from lenders. It' s too bad because the cost of borrowing is dwarfed by the savings many times making any loan to install solar cash flow positive in year one. "" Cannabis is not limited to medical and recreational use. Hemp is one of the most versatile crops on the planet and it' s making a big comeback. As hemp is not grown for psychoactive component tetrahydrocannabinol THC its farming is far less intensive and finicky than for example the high THC flower grown by Canndescent in California. This makes hemp a potentially ideal agrivoltaic crop. In Melz Germany agrivoltaic developer SunFarming is currently trialing the cultivation of hemp under solar panels. "" All the plants without exception have grown well and developed excellently "" says Rafael Dulon founder and managing director of Hanf Farm. Dulon says the panels also help with mold a key concern for hemp farmers. Mold becomes a problem in autumn as temperatures cool and precipitation on the plants fails to dry. "" The rain protection for the plants provided by the PV system works wonderfully "" Dulon says. Another less energy intensive growing option is a greenhouse. However the need for natural sunlight makes rooftop solar less attractive on such structures. Building integrated photovoltaics BIPV are improving rapidly but products such as solar facades for greenhouses may be years away. In the meantime US based nanomaterials innovator UbiQD' s agricultural arm UbiGro and solar panel manufacturer Heliene have entered a joint development agreement for light optimizing energy producing modules designed for agrivoltaic greenhouses. UbiGro' s translucent greenhouse film is integrated with photoluminescent particles that convert light into a preferable wavelength and can easily be combined with a solar module. UbiQD founder and CEO Hunter McDaniel tells pv magazine that if the panels only partially shade the greenhouse the yield lost to shading can be made up by the spectrum improvement provided by the film. McDaniel adds that while indoor growers have thus far managed to keep the scope of their energy intensity quiet as opposed to similarly emergent energy intensive industries such as crypto mining indoor growth is likely to shrink. Scientist Mills' research supports this idea as the number of growers who operate primarily indoors fell from 80 to 60 between 2016 and 2020. With outdoor growth too inconsistent and indoor growth shrinking the hybrid option of greenhouses is likely to proliferate in the future. The cannabis industry trend of solar uptake is continuing in US states where the plant has been legalized more recently such as New York. Nate VerHague market development manager at New York based installer Solar Liberty tells pv magazine the new cannabis market "" is going to have a ton of potential for solar. "" VerHague notes cannabis operations in New York state NYS are only just getting up and running. "" There is going to be some extreme utility costs when it comes to electricity for these large operations "" he says. "" It is an ideal customer for solar as these facilities typically have a lot of roof space to utilize. "" While the nascent nature of the NYS market means Solar Liberty is yet to install PV for a cannabis company VerHague confirms the installer is "" in the quoting process with some organizations "" with the market looking promising from this year onward. National and regional governments the world over are shaping to follow in the footsteps of the US Canada Mexico Thailand Uruguay and others. Germany could legalize cannabis completely within a few years a development which would not only make it the world' s single largest legal market but also considering its European centrality the world' s "" dankest "" domino as neighbors would be expected to follow. Cannabis is becoming big business and given its energy needs could be big business for solar too."


In [20]:
def check_target_url_in_train_df(target_df: pd.DataFrame, train_df: pd.DataFrame) -> pd.DataFrame:
    url_in_train_df = set(train_df['url'])
    target_df['url_in_train_df'] = target_df['article_url'].isin(url_in_train_df)
    return target_df

In [21]:
url_check = check_target_url_in_train_df(df_test, data_cleaned)
url_check = url_check[url_check['url_in_train_df'] == False]
u.print_full_text(url_check)

Unnamed: 0,example_id,question_id,question,relevant_text,answer,article_url,url_in_train_df
0,1,1,What is the innovation behind Leclanché's new method to produce lithium-ion batteries?,"Leclanché said it has developed an environmentally friendly way to produce lithium-ion (Li-ion) batteries. It has replaced highly toxic organic solvents, commonly used in the production process, with a water-based process to make nickel-manganese-cobalt-aluminium cathodes (NMCA).",Leclanché's innovation is using a water-based process instead of highly toxic organic solvents to produce nickel-manganese-cobalt-aluminium cathodes for lithium-ion batteries.,https://www.sgvoice.net/strategy/technology/23971/leclanches-new-disruptive-battery-boosts-energy-density/,False
1,2,2,What is the EU’s Green Deal Industrial Plan?,"The Green Deal Industrial Plan is a bid by the EU to make its net zero industry more competitive and to accelerate its transition to net zero. It intends to support the expansion of European manufacturing of technologies, goods and services needed to achieve its climate targets.","The EU’s Green Deal Industrial Plan aims to enhance the competitiveness of its net zero industry and accelerate the transition to net zero by supporting the expansion of European manufacturing of technologies, goods, and services necessary to meet climate targets.",https://www.sgvoice.net/policy/25396/eu-seeks-competitive-boost-with-green-deal-industrial-plan/,False
3,4,3,What are the four focus areas of the EU's Green Deal Industrial Plan?,"The new plan is fundamentally focused on four areas, or pillars: the regulatory environment, access to finance, enhancing skills, and improving supply chain resilience. It also builds on other initiatives, such as REPowerEU, as well as the strength of the single market.","The four focus areas of the EU's Green Deal Industrial Plan are the regulatory environment, access to finance, enhancing skills, and improving supply chain resilience.",https://www.sgvoice.net/policy/25396/eu-seeks-competitive-boost-with-green-deal-industrial-plan/,False
22,23,21,Which has the higher absorption coefficient for wavelengths above 500m - amorphous germanium or amorphous silicon?,We chose amorphous germanium instead of amorphous silicon as absorber material because of its higher absorption coefficient for wavelengths above 500 nm.,amorphous germanium,https://www.pv-magazine.com/2021/01/15/germanium-based-solar-cell-tech-for-agrivoltaics/#respond,False


In [22]:
# print(df_train[df_train['url'].str.startswith('https://www.pv-magazine.com/2021')])

In [23]:
# url_check.to_csv(file_path+'data_processed/target_urls_not_in_train_df.csv', index=False, sep=';')

In [24]:
# # save evaluation data without false urls
# url_check = check_target_url_in_train_df(df_test, data_cleaned)
# cleantech_rag_evaluation_data = url_check[url_check['url_in_train_df'] == True]

# cleantech_rag_evaluation_data = cleantech_rag_evaluation_data.drop(columns=['url_in_train_df'])

# # save as parquet file
# cleantech_rag_evaluation_data.to_parquet('../data_mc1/data_processed/cleantech_rag_evaluation_data.parquet', index=False)

# read parquet file
cleantech_rag_evaluation_data = pd.read_parquet('../data_mc1/data_processed/cleantech_rag_evaluation_data.parquet')
cleantech_rag_evaluation_data.head()

Unnamed: 0,example_id,question_id,question,relevant_text,answer,article_url
0,3,2,What is the EU’s Green Deal Industrial Plan?,The European counterpart to the US Inflation R...,The EU’s Green Deal Industrial Plan aims to en...,https://www.pv-magazine.com/2023/02/02/europea...
1,5,4,When did the cooperation between GM and Honda ...,What caught our eye was a new hookup between G...,July 2013,https://cleantechnica.com/2023/05/08/general-m...
2,6,5,Did Colgate-Palmolive enter into PPA agreement...,"Scout Clean Energy, a Colorado-based renewable...",yes,https://solarindustrymag.com/scout-and-colgate...
3,7,6,What is the status of ZeroAvia's hydrogen fuel...,"In December, the US startup ZeroAvia announced...",ZeroAvia's hydrogen fuel cell electric aircraf...,https://cleantechnica.com/2023/01/02/the-wait-...
4,8,7,"What is the ""Danger Season""?",As spring turns to summer and the days warm up...,"The ""Danger Season"" is the period in the North...",https://cleantechnica.com/2023/05/15/what-does...
