!pip3 install bitsandbytes accelerate peft

In [None]:
import gc

import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import numpy as np

# Method 1

In [None]:
# specify how to quantize the model. See https://huggingface.co/docs/transformers/en/main_classes/quantization
quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype="float16",
)

# load the model and tokenizer from huggingface.io
model_name = "mistralai/Mistral-7B-Instruct-v0.2" # 32K context window
# model_name = "EleutherAI/gpt-neo-125m"
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [None]:
model

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# configure the model for training with LORA. See https://huggingface.co/docs/peft/en/package_reference/lora
config = LoraConfig(
    r=16,
    lora_alpha=32,
    # target_modules=["k_proj", "v_proj", "q_proj", "out_proj"], # "EleutherAI/gpt-neo-125m"
    target_modules=["k_proj", "v_proj", "q_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head"], #"mistralai/Mistral-7B-Instruct-v0.2"
    lora_dropout=0.05,
    bias="all", #"none", "all", "lora_only"
    task_type="CAUSAL_LM",
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, config)

In [None]:
model

# Sample transcript

In [None]:
example_transcript = """Prepared Remarks:
Operator

Good day, ladies and gentlemen, and welcome to the Fourth Quarter 2019 Hess Midstream Partners' Conference Call. My name is Dillon, and I'll be your operator for today. [Operator Instructions]. Later, we will conduct a question-and-answer session. [Operator Instructions]. I would now like to turn the conference over to Jennifer Gordon, Vice President of Investor Relations. Please proceed.

Jennifer Gordon -- Vice President of Investor Relations

Thank you, Dillon. Good afternoon, everyone, and thank you for participating in our fourth quarter earnings conference call. Our earnings release was issued this morning and appears on our website, www.hessmidstream.com. Today's conference call contains projections and other forward-looking statements within the meaning of the federal securities laws.

These statements are subject to known and unknown risks and uncertainties that may cause actual results to differ from those expressed or implied in such statements. These risks include those set forth in the Risk Factor section of Hess Midstream's filings with the SEC. Also on today's conference call, we may discuss certain non-GAAP financial measures. A reconciliation of the differences between these non-GAAP financial measures and the most directly comparable GAAP financial measures can be found in the earnings release. With me today are John Gatling, President and Chief Operating Officer; and Jonathan Stein, Chief Financial Officer.

I'll now turn the call over to John Gatling.

John A. Gatling -- President and Chief Operating Officer

Thanks, Jennifer. Good afternoon, everyone, and welcome to Hess Midstream's fourth quarter 2019 conference call. Today, I'll review our operating performance and highlights as we continue to execute our strategy, provide additional details regarding our 2020 plans and discuss Hess Corporation's latest results and outlook for the Bakken. Jonathan will then review our financial results.

2019 was a year of strong performance and strategic execution for Hess Midstream. We delivered year-on-year double-digit percentage increases in volumes across all of our systems, realized significant EBITDA growth and completed the acquisition of Hess Infrastructure Partners, eliminating IDR payments and converting to an Up-C corporate structure as part of the transaction.

We entered 2020 as a large-scale, full service midstream Company, well positioned for visible adjusted EBITDA growth and increasing free cash flow generation with a platform that provides opportunity for broad investor participation. Also in 2019, we made substantial investments to further expand our strategically positioned infrastructure, including significantly increasing our total gas processing capacity, acquiring Hess' water business and completing a series of key Hess Midstream-led gathering and compression projects, which were all delivered on time and on budget.

In partnership with Targa Resources, we started up the Little Missouri 4 gas plant in mid-year, expanding our base and processing footprint, complementing our full fractionation capability at the Tioga Gas Plant. LM4 increased our total nameplate processing capacity by 100 million cubic feet per day, or an increase of 40% to 350 million cubic feet per day. And our capacity is continuing to grow with the in-progress expansion of TGP, taking Hess Midstream's total Bakken processing capacity to 500 million cubic foot per day.

Complementing our strategic projects, we also continue to expand our gathering systems to accommodate growth from Hess and third-party customers. And late in 2019, put our first Hess Midstream operated saltwater disposal well in service. In addition to our organic growth investments, in early 2019, we partnered with HIP to acquire Summit's Tioga oil, gas and water gathering systems, adding to the acquisitions and JV investments that HESM and HIP have made over the past couple of years.

Our targeted investments to expand our system footprint, combined with consistent and reliable operating performance enabled us to deliver strong volume growth in 2019. In the fourth quarter of 2019, we completed our volume ramp through the LM4 gas plant, albeit at a slower pace than anticipated.

During the quarter, we also progressively backfilled TGP, though, we did experience minor delays integrating some new third-party volumes. Gas processing volumes averaged 308 million cubic foot per day in the quarter or approximately 90% of nameplate capacity, an increase of approximately 20% over the third quarter and 30% over the second quarter, which was prior to the start-up of LM4.

For full-year 2019, gas processing volumes averaged 260 million cubic foot per day, a 12% increase over the prior year. For our crude oil business, fourth quarter 2019 crude terminaling volumes were 148,000 barrels of oil per day, a 14% increase over the third quarter, primarily driven by increasing Hess production, as Hess brought on 59 new wells online in the quarter. For full year 2019, crude terminaling volumes averaged 131,000 barrels of oil per day, a 30% increase over the prior year. Water gathering volumes were 50,000 barrels of water per day in the fourth quarter of 2019, and an 11% increase over the third quarter, driven by Hess' growing production and continued expansion of the water system.

For full-year 2019, water gathering volumes averaged 100 -- averaged 41,000 barrels of water per day, an increase of 64%, increased over prior year. Now turning to Hess upstream highlights. Earlier today, Hess reported fourth quarter 2019 production from the Bakken of 174,000 barrels of oil equivalent per day, an increase of approximately 38% over the year ago quarter. For full-year 2019, Hess net Bakken production averaged 152,000 barrels of oil equivalent per day, reflecting the strong performance of the plug and perf completions and the quality of Hess' acreage position.

For full-year 2020, Hess forecasts Bakken net production to average approximately 180,000 barrels of oil per day or 18% above full-year 2019. In 2020, Hess expects to drill approximately 170 wells and bring online 175 compared to 160 wells drilled and 156 wells brought online in 2019. In the first quarter of 2020, Hess expects net production to average 170,000 barrels of oil equivalent per day, reflecting lower activity levels due to seasonally difficult winter weather conditions. Hess expects to bring online approximately 30 new wells compared to the 59 in the fourth quarter of 2019.

Hess, which has already hedged a substantial portion of its 2020 crude oil production, expects to operate six rigs in the Bakken through 2020 and anticipates net production to increase throughout the year, approaching 200,000 barrels of oil equivalent per day by the end of 2020, which is a key driver of volume growth for Hess Midstream. Now turning to Hess Midstream guidance. Hess Midstream's project planning execution in 2019 laid an excellent foundation for another year of strong throughput and financial growth in 2020, driven primarily by the full year of LM4 operations and Hess' continued production growth.

For 2020, we're reaffirming our previously provided guidance. We expect gas gathering volumes to average between 300 million cubic foot per day and 310 million cubic foot per day and gas processing volumes to average between 285 million cubic foot per day and 295 million cubic foot per day. This guidance incorporates the previously announced 45 day TGP maintenance turnaround, which is planned to commence in the third quarter, reducing our annual gas gathering and processing volumes by approximately 30 million cubic foot per day.

First quarter 2020 gas volumes are anticipated to be relatively flat compared to fourth quarter 2019, primarily due to seasonal winter weather conditions. We continue to progress backfilling TGP and as we integrate incremental third-party volumes, we expect to realize further volume growth in the second quarter. We continue to expect third parties to comprise approximately 30% of our total gas gathering and processing volumes, underlying our advantage infrastructure position in the basin. Turning to our crude oil assets. We anticipate continued growth in 2020, driven by increasing Hess production and a stable third-party outlook.

For full year 2020, crude oil gathering volumes are expected to average between 125,000 and 130,000 barrels of oil per day, an increase of approximately 10% compared to 2019. And we anticipate crude terminaling volumes to average between 150,000 and 160,000 barrels of oil per day, an increase of approximately 18% compared to 2019. Third party throughputs are expected to remain at approximately 15% of our total crude oil volumes. First quarter crude oil volumes are expected to be approximately flat with the fourth quarter, reflecting lower planned activity levels due to seasonal winter weather. Turning to our water assets. We continue to expand our gathering footprint in 2020, which will bring more volumes into the system as Hess' production grows.

We anticipate 2020 water gathering volumes to average between 55,000 and 65,000 barrels of water per day, an increase of 46% over full-year 2019, demonstrating the growth potential of these assets. First quarter water volumes are expected to be modestly up from the fourth quarter as we continue our infrastructure build out. For full year 2020, volume increases I've described is a key driver to the expected 32% increase in adjusted EBITDA from 2019, from the midpoint of our 2020 guidance -- to the midpoint of our 2020 guidance. Looking to the longer-term, in this morning's press release, we provided our MVCs for 2022, illustrating the implied growth in system throughputs and our capacity continues to expand.

Turning to Hess Midstream's capital program. Our 2020 capital guidance remains unchanged, comprising approximately $335 million of expansion capital and $15 million of maintenance capital. We plan to invest approximately $155 million in gas processing, which includes the 150 million cubic foot per day expansion of TGP. This increase will take Hess Midstream's overall Bakken gas production capacity to 500 million cubic foot per day.

Expansion activities progressed in the fourth quarter of 2019, as we continue to advance civil construction and fabrication activities. We expect to begin major construction -- major facility construction in 2020. The project is on pace to be completed by mid-2021. In 2020, we also plan to invest $60 million in gas compression and $120 million in oil, gas and water pipelines and well pad interconnects for Hess and third-party customers.

In addition to our capital investment program, we continue to evaluate business development opportunities to further strengthen our portfolio and deliver competitive returns to our shareholders. In summary, for 2020, we continue to remain focused on executing our strategy and leveraging the new Hess Midstream structure to drive long-term and sustainable growth.

I'll now turn the call over to Jonathan to review our financial results.

Jonathan C. Stein -- Chief Financial Officer

Thanks, John, and good afternoon, everyone. As John described, we are proud with the progress we've made in 2019 on executing our strategy. The closing of the HIP acquisition and associated debt transactions in mid-December were significant milestones for us, and and we entered 2020 well positioned for significant adjusted EBITDA growth and increasing free cash flow generation, while maintaining conservative leverage without the need for funding from the equity capital market to deliver our current plan.

With the midpoint of our 2020 guidance representing expected increases relative to our 2019 results of 40% in net income, 32% in adjusted EBITDA and 88% in free cash flow defined as adjusted EBITDA less capital expenditures, we truly have differentiated financial metrics. Hess Midstream has a track record of shareholder value creation to consistent delivery of our financial metrics. Since our IPO, we have delivered our targeted 15% annualized growth in our distribution per unit. And as we described, with the announced HIP transaction, we are maintaining our commitment to our targeted 15% annualized distribution per share growth through 2021.

Consistent with that commitment, on January 27th, we announced our fourth quarter distribution that increased 3.6% quarter-on-quarter and 15% year-on-year. The distribution will be paid on February 14th to holders as of the February 6th record date. Turning to our results. I will compare results from the fourth quarter to the third quarter. For comparability and consistent with the closing of the transaction in the fourth quarter, all prior period results have been restated for the acquisition of HIP.

For the fourth quarter 2019, net income was $75 million compared to $87 million for the third quarter. Fourth quarter net income included approximately $26 million of costs related to our acquisition of HIP. Adjusted EBITDA, excluding the transaction costs for the fourth quarter was $158 million compared to $136 million for the third quarter.

The change in adjusted EBITDA relative to the third quarter was primarily attributable to the following. Our total revenues increased by greater than 15% quarter-on-quarter, including, revenues for our gathering segment increased by approximately $14 million, primarily driven by increasing Hess production.

Revenues for our processing segment increased by approximately $13 million, primarily driven by the ramp up of the LM4 gas processing plant and the continued backfill of TGP. And revenues for our terminaling segment increased by approximately $2 million, primarily driven by increasing Hess production. Total operating expenses including G&A but excluding depreciation and amortization, pass-through and transaction costs, were higher, decreasing adjusted EBITDA by approximately $6 million, including higher seasonal maintenance activity during the period of approximately $3 million and higher seasonal overhead of approximately $3 million.

LM4 processing fees, net of our proportional share of earnings and depreciation reduced adjusted EBITDA by approximately $1 million, resulting in fourth quarter adjusted EBITDA of $158 million, a 16% increase relative to the third quarter. Fourth quarter 2019 maintenance capital expenditures were approximately $2 million and net interest, excluding amortization of deferred finance costs was $16 million. The result was that distributable cash flow was approximately $140 million for the fourth quarter of 2019, covering our distribution by approximately 1.2 times.

Expansion capital expenditures in the fourth quarter were $107 million. At quarter end, debt was $1.8 billion, representing approximately three times leverage on a 2019 basis. Turning to our recalculation process. At the end of 2019, we completed our nomination process with Hess and updated our tariff rates for 2020 and all forward years.

As for cycles, the nomination process considered changes in actual and forecasted volumes and capex to maintain our contractual targeted return on capital deployed. Tariffs increased in 2020, primarily from recovering reduced revenues from lower volumes and generating incremental revenues on higher capex during 2019, as a result of the delay in LM4, which we had proactively integrated into our contract structure.

In our earnings release, we have also provided MVCs for the years 2020 through 2022. For 2022, MVCs were newly established, providing line of sight to potential long-term growth in system throughput. For example, our 2022 MVCs for gas processing imply an approximate 18% annualized growth rate in nominated volumes from our actual 2019 volumes. Together with our gas gathering volumes, these gas assets comprised approximately 70% of our revenues. Our updated MVCs provide a solid baseline to our volume growth and are a key indicator of our financial strength beyond 2021.

Turning to 2020, in the first quarter, we expect net income to be approximately $110 million to $120 million and adjusted EBITDA to be approximately $175 million to $185 million, an approximate 14% increase in EBITDA at the midpoint, relative to the fourth quarter. First quarter maintenance capital expenditures and net interest, excluding amortization of deferred finance costs are expected to be approximately $30 million, resulting in an expected DCF of approximately $145 million to $155 million, delivering distribution coverage at the midpoint of the range of approximately 1.2 times.

Relative to our fourth quarter 2019 results, the expected increase in adjusted EBITDA is primarily driven by higher tariff rates and seasonally lower opex. Looking through the rest of 2020, we expect to achieve our 15% distribution growth target with approximately 1.2 times distribution coverage, with approximately 85% of our expected revenues protected by MVCs and maintaining an adjusted EBITDA margin, consistent with our historical margin of greater than 75%.

As John described, we anticipate commencing a maintenance turnaround at TGP in the third quarter, during which time, we will incur higher operating expenses and maintenance capital and receive lower volumes and revenues, resulting in lower distribution coverage during the period of the turnaround. The turnaround is expected to last 45 days and has been fully incorporated into our 2020 volume and financial guidance already.

As a reminder, Hess Midstream will receive MVC payments during the turnaround. For 2020, overall, we are reaffirming our financial guidance. Full-year net income is expected to be in the range of $440 million to $480 million. Adjusted EBITDA is expected to be in the range of $710 million to $750 million, which represents at the midpoint an approximate 32% increase over our 2019 results.

This annual EBITDA increase is primarily driven by higher annual volumes and MVCs, as well as higher tariff rates, driven by the annual inflation escalator, and the rate redetermination process offset by higher opex, primarily from the planned TGP turnaround. Maintenance capital and cash interest are projected to total approximately $110 million for the full year 2020.

Distributable cash flow for 2020 is expected to be in the range of $600 million to $640 million. Putting it all together, we are well positioned for continued and sustainable growth. Compared to 2019 results, our 2020 guidance represents expected increases of 45% in net income, 32% in adjusted EBITDA and 88% in free cash flow. Even with our continued gas processing and compression investment, we expect free cash flow of approximately $380 million at the midpoint of our 2020 adjusted EBITDA and capex guidance.

Looking forward to 2021, we expect approximately 25% annualized growth in adjusted EBITDA relative to 2019 and approximately 75% free cash flow conversion, funding both our expansion capex and distributions with DCF. Longer term, our updated MVCs highlight the continued organic growth we expect to continue in 2022.

We have transitioned to a structure that is more attractive for broad investor participation and it highlights Hess Midstream's unique proposition of industry-leading growth, best-in-class contract structure, strong free cash flow generation and conservative balance sheet.

This concludes my remarks. We'll be happy to answer any questions.I will now turn the call over to the operator.

Questions and Answers:
Operator

[Operator Instructions]. Your first question comes from the line of Phil Stuart from Scotiabank. Please proceed.

Philip Stuart -- Scotiabank -- Analyst

Good morning, guys. Appreciate the commentary and the update on the 2022 MVCs. I wonder if we could start off with kind of the $120 million in ongoing or sustaining capex that you all identified kind of in the 2020 budget. As you look out to 2021 and 2022, as Hess starts to ramp down activity and kind of get into that maintenance mode, how do you see that bucket of capex trending kind of in the outer years?

Does it decline materially, or does it kind of sustain in that kind of $120 million range?

Jonathan C. Stein -- Chief Financial Officer

Yeah. So, I think the way to think about our run rate in terms of expansion capital going forward is that as we look forward, we'll be ramping up the compression expansion projects and TGP expansion primarily completed in 2020 and into 2021. So then based on that, we'll be able to achieve our growth targets based on lower ongoing capital, which is basically the -- think of that $120 million, which is Hess and third-party interconnects representing our ongoing capital. Together with, if you look at this year maintenance capital of $15 million, that gets you to total ongoing capital of about $135 million.

So without giving a specific number, certainly, if you look historically, our ongoing capital has been together with maintenance in that kind of range. For comparison, our annual depreciation has been approximately $150 million. So that's also consistent with that level of ongoing capex. So, you can kind of think of that level of where we are, kind of going forward looking at that bucket, plus maintenance and then certainly comparing it to our depreciation gives you some feel of what we expect going forward.

Philip Stuart -- Scotiabank -- Analyst

Great. I appreciate the commentary there. And then on TGP, do you have an estimate of how much capex is that project will include for 2021, understanding there is still a lot of moving pieces there? But just kind of curious if you can identify how much capex there is associated with that project kind of currently planned for 2021?

John A. Gatling -- President and Chief Operating Officer

Yeah. So, we're not providing any real direction on 2021 capex for the expansion, but a significant portion of the facility construction work will be completed this year with some activities continuing into 2021. I would say the material amount of spend for the project will be primarily in 2020, but there will be some residual spend in 2021 as we kind of wrap up activities.

Philip Stuart -- Scotiabank -- Analyst

Okay. Great. That makes sense. And then, I wonder if we could maybe step back a bit and talk about kind of the distribution policy. Post 2021, obviously, you all have laid out a good plan of 15% kind of annual growth through 2021. But just kind of thinking 2022 and beyond, assuming no major third party acquisitions, how should we think about what the governors are to distribution growth kind of beyond 2021?

I'm thinking about it in terms of obviously understanding that you guys probably want to maintain 1.2 times distribution coverage -- at least 1.2 times. But also in terms of leverage, I guess, as I look at it, it seems like net debt to EBITDA is going to be improving kind of throughout 2021, kind of maybe getting close to that 2.5 times range. Is that 2.5 times range kind of a comfortable spot for you all longer term, as we think about potential governors to distribution growth? I know you've set the three times target but just kind of curious how you guys are thinking about things longer term?

Jonathan C. Stein -- Chief Financial Officer

Sure. So as we look at how we've always set our distribution growth, it's been a level that we can deliver consistently and with the transaction on new platform, that isn't changing. So, our 2022 DPS growth will be at a level that's consistent with both our organic growth and also our financial targets and metrics, including the ones you mentioned.

So starting on the organic growth side, if you look at our MVCs, they provide some transparency to our organic growth there with the gas volumes, which represent 70% of our revenues growing approximately 15% from 2021 to 2022. But we'll also be consistent, as you said, with our financial metrics, including, yes, the 1.2 times coverage and our targeted three times leverage target, even though we will be naturally delevering absent additional opportunities, certainly that provides us the flexibility to do additional investments as we've talked about.

So with our assets being extremely cash flow generative and the ability to -- we expect to really be able, as I mentioned, to fund our capex and distributions with DCF by 2021, we'll have significant financial flexibility. But we're also going to remain disciplined to set our distribution growth relative to organic growth and also to our financial targets. And that's the approach we've taken historically and that's what you should expect certainly going forward. As we get closer to 2022, obviously, we'll provide more details. But in terms of our framework of thinking about how we are thinking about it, it will be using that financial flexibility but also remain disciplined to our organic growth and our financial metrics.

Philip Stuart -- Scotiabank -- Analyst

Okay. Great. And then, I guess, one last quick one from me. Obviously, with the turnaround in 3Q of this year, when we think about 4Q kind of gas gathering and processing volumes, will those kind of be able to ramp back up to similar levels to maybe 2Q'20? Or will there be kind of a slower ramp process, I guess in 4Q, coming off of the turnaround in 3Q?

John A. Gatling -- President and Chief Operating Officer

No, I would say there will definitely be a ramp up. I think it's just a natural process of bringing the system back up and kind of working through all of that, but the available gas is already there. So, we would expect to see the volume to come into the system fairly quickly. But again, we want to be a bit cautious here and make sure that we do it the right way.

It's a big project for us to actually do the turnaround and then also to tie in some critical aspects of the overall expansion. So, I would say that we will definitely see a ramp post debottlenecking and turnaround, but that ramp will really be kind of primarily done in the third quarter, but there could be a little bit of impact in fourth. So, I think we'll just -- we'll cautiously look at it and as we get closer to the actual turnaround time, we'll provide a little bit more transparency into that.

Philip Stuart -- Scotiabank -- Analyst

All right. Great. Thanks, guys. That's it from me.

John A. Gatling -- President and Chief Operating Officer

Thank you.

Operator

Thank you. Our next question comes from Jeremy Tonet from J.P. Morgan. Please go ahead.

Rahul Krotthapalli -- J.P. Morgan -- Analyst

Good afternoon, guys. This is Rahul on for Jeremy. I just have one quick question here. Is there a way to break out the 25% y-o-y EBITDA growth between the volumes and the fee uplift, and like some more granularity on the rate recalculation and what the drivers you have for the growth would be appreciated. Thank you.

Jonathan C. Stein -- Chief Financial Officer

And just to be clear, you're asking for full year, going from '19 to '20, right?

Rahul Krotthapalli -- J.P. Morgan -- Analyst

Yeah.

Jonathan C. Stein -- Chief Financial Officer

Yeah. So that increase is about 32% year-on-year to the midpoint. Primarily two-thirds of that is actually volume growth, that's coming from organic gas line growth in gas processing as we have now LM4 online. It continue to backfill TGP. We also have organic oil growth as Hess continues to ramp up production toward the 200,000 BOE per day.

We also have higher MVCs, which have increased as the development plan has shifted and particularly with the turnaround, providing some MVC revenue there as well. The rest of the increase is really increased tariff rates, as you mentioned, offset by higher annual costs. The increased tariff rates are really driven by revenue recovery for more volumes and return on the incremental capital that we spent in 2019 from the delay in LM4 and therefore the rate went up as part of that rate redetermination process at the end of the year. There is also the annual inflation escalators, which has an impact on increased rates and then that's offset by higher opex, which particularly -- primarily is driven by just the turnaround costs that we expect during the year.

Rahul Krotthapalli -- J.P. Morgan -- Analyst

Understood. That's helpful color. Thanks, guys.

Operator

Thank you. Our next question comes from Spiro Dounis from Credit Suisse. Please go ahead.

Douglas Irwin -- Credit Suisse -- Analyst

Hey, this is Doug Irwin on for Spiro. Thanks for the question. Just real quick on the 2022 MVCs. I guess, kind of on its face, the crude numbers imply a decline and I appreciate it's based on a different percent of the nomination versus 2021. But can you maybe just help us think about it on an apples-to-apples basis and kind of what that means for crude growth longer term?

Jonathan C. Stein -- Chief Financial Officer

So. Yeah, so let's talk about how the MVCs -- again, as I mentioned in the script, MVCs provide a line of sight to continued growth. So, I'll give you the mechanics and then John can talk about kind of broad volume growth and some of the drivers. So in general, the 2021 MVCs, as we mentioned in our press release are approximately 25% or higher. It's also the contract mechanics that MVCs is higher but they can go lower.

So particularly with the oil gathering MVCs, that's the sum of multiple subsystems. So with the 2021 MVC above 85% compared to the 2022 MVC, we'd see that oil gathering volumes will be approximately flat year-on-year. On the other side on gas processing and gathering MVCs, the 2022 MVCs would imply continued growth as we complete the TGP expansion. Again, as I mentioned, the gas volume growth, which is 70% of our revenues, from 2021 to 2022 is about 15%.

So, all of our businesses, really will be looking -- as we look forward, the MVCs really imply that growth that we talked about all supported of course by the free cash flow positive and free cash flow generation that we've talked about and the ability to completely fund our capex and distribution. So relative to those metrics and really -- and the MVCs provide visibility to this, really in a very differentiated position in terms of our long-term outlook and the unique platform that we have.

John A. Gatling -- President and Chief Operating Officer

Yeah. And I would just build on Jonathan's point there. I think just one thing to look at gas versus oil. Gas, you have to get it in pipe, I mean there's flaring constraints in the basin. And so you've got to get it in pipe and so that's been a priority for producers in the basin and also other midstream companies providing services to producers.

So that's had us, allowed us to capture more third-party volumes and will actually be the things that will help us fill TGP going long-term. On the oil side and on the gas, just a reminder that approximately 30% of our gas system is third parties. On the oil side, -- sorry, third parties represent about 15% of our total crude oil system. So, there is definitely some run room there on the third-party side to capture additional oil opportunities.

There is still a focus on getting more barrels off-trucks and into the gathering system, and then, obviously, with our flexible export options, we've got the terminal north of the river, we got the rail terminal and then we got the terminal south of river. We essentially can access all markets from a crude perspective. And so we see that as a differentiator for us in the basin. That will definitely attract more crude volumes, third-party crude volumes into our gathering and terminaling system. So while right now, we're saying that the number is kind of at that 15%, we see some potential growth opportunities there for us.

Douglas Irwin -- Credit Suisse -- Analyst

Got it. Great. That's all from me. Thanks.

John A. Gatling -- President and Chief Operating Officer

Thank you.

Operator

[Operator Closing Remarks].

Duration: 34 minutes

Call participants:
Jennifer Gordon -- Vice President of Investor Relations

John A. Gatling -- President and Chief Operating Officer

Jonathan C. Stein -- Chief Financial Officer

Philip Stuart -- Scotiabank -- Analyst

Rahul Krotthapalli -- J.P. Morgan -- Analyst

Douglas Irwin -- Credit Suisse -- Analyst"""

In [None]:
# Justify your report with examples, and don't be afraid to be verbose.
    
transcript = example_transcript[:10000] # limit the transcript to 44000 characters to avoid memory issues

prompt = f"""You are a financial analyst who specializes in analyzing earnings call transcripts. Report your analysis for the given transcript and include the following sections:
1. An analysis on whether the vocabulary is abstract and vague, or concrete. Include a rating from 1 to 5, where 1 is "vague" and 5 is "concrete".
2. An analysis on whether the transcript is informative and insightful, or superficial. Include a rating from 1 to 5, where 1 is "superficial" and 5 is "informative".

Be creative in your reporting and provide examples to justify your ratings.
    
### Transcript:
{transcript}
"""
    
messages = [
    {"role": "user", "content": prompt}, # "mistralai/Mistral-7B-Instruct-v0.2"
    # {"role": "user", "content": prompt2},
    # {"role": "user", "content": prompt3},
    # ...
]

# generate the response in inference mode
with torch.no_grad():
    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    generated_ids = model.generate(encodeds, max_new_tokens=8000, do_sample=True, typical_p=0.9, pad_token_id=tokenizer.eos_token_id)

# decode the response
q2 = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# manual parsing of the output to get relevant part. 
final_output=q2.split("[/INST]")[-1][1:]

# clean up memory
del encodeds, generated_ids, q2, prompt
gc.collect()



In [None]:
final_output

# Method 2 - DSPy version

In [None]:
# pip install dspy-ai
import gc
import dspy
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [None]:
from transformers import BitsAndBytesConfig

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


In [None]:


# language model 
access_token = "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"
model_name = "EleutherAI/gpt-neo-125m"
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # 8K context window
# model_name = "mistralai/Mistral-7B-Instruct-v0.2" # 32K context window
llm = dspy.HFModel(model=model_name, hf_device_map='auto', token=access_token)
llm.kwargs['max_new_tokens']=200
# llm.kwargs['do_sample']=True
# llm.kwargs['typical_p']=0.9

In [None]:
llm.model

In [None]:
############## This is how you can use quantization with dspy
llm.model=None # clear out the model
gc.collect() # clear out the memory
##############

# specify how to quantize the model. See https://huggingface.co/docs/transformers/en/main_classes/quantization
quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype="float16",
)

# load the model and tokenizer from huggingface.io
llm.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

config = LoraConfig(
    r=16,
    lora_alpha=32,
    
    target_modules=["k_proj", "v_proj", "q_proj", "out_proj"], 
    lora_dropout=0.05,
    bias="all", #"none", "all", "lora_only"
    task_type="CAUSAL_LM",
)

llm.model = prepare_model_for_kbit_training(llm.model)
llm.model = get_peft_model(llm.model, config)
print_trainable_parameters(llm.model)
dspy.settings.configure(lm=llm)

In [None]:


class TranscriptAnalyst(dspy.Module):
    def __init__(self):
        super().__init__()
        self.vague_concrete_analyzer = dspy.Predict("transcript -> linguistic_analysis_on_vague_versus_concrete_vocabulary_with_1_to_5_rating")
        self.insightful_superficial_analyzer = dspy.Predict("transcript -> linguistic_analysis_on_insightful_versus_superficial_content_with_1_to_5_rating")
        self.final_report = dspy.ChainOfThought("vague_concrete_analysis, insightful_superficial_analysis -> detailed_report_on_negative_linguistic_characteristics_of_transcript")

    # other possible functions that fetch data that can be possibly useful in the forward function
    # def get_company_news(self, ticker):
    #     pass
    #     return None

    # def get_financial_statements(self, ticker):
    #     pass
    #     return None

    # def rsi(self, ticker):
    #     pass
    #     return None

    def forward(self, transcript):
        # each function param has to be explicity passed to the function, i.e. transcript=transcript, not just transcript
        vague_concrete_analysis = self.vague_concrete_analyzer(transcript=transcript)
        insightful_superficial_analysis = self.insightful_superficial_analyzer(transcript=transcript)
        return self.final_report(vague_concrete_analysis=vague_concrete_analysis.linguistic_analysis_on_vague_versus_concrete_vocabulary_with_1_to_5_rating,
                             insightful_superficial_analysis=insightful_superficial_analysis.linguistic_analysis_on_insightful_versus_superficial_content_with_1_to_5_rating)

In [None]:
transcript_analyst = TranscriptAnalyst()
output = transcript_analyst(transcript=example_transcript[:1000])