In [1]:
import pandas as pd
import numpy as np

In [2]:
companies_df = pd.read_csv('oil_companies.csv')
oilTrend_df = pd.read_csv('DA_OilTrends - Oil_Companies.csv', dtype="str")

In [3]:
### List of industries we believe are highly dependent on crude oil

oil_industries = list(set(companies_df["Industry"]))
for industry in oil_industries:
    print(industry)

Oil Equipment & Services
Integrated Oil & Gas
Fibres & Plastics
Oil Marketing & Distribution
Plastic Products
Refineries/ Petro-Products
Petrochemicals
Agrochemicals


In [4]:
### List of companies under each sector available for trend analysis.
industry_company_map = dict()
# list of companies with no "NOT AVAILABLE"
avail_comp = [i for i in list(oilTrend_df.columns) if "NOT AVAILABLE" not in list(oilTrend_df[i])]
for oil_industry in oil_industries:
    df = companies_df[companies_df.Industry.str.contains(oil_industry)]
    industry_company_map[oil_industry] = list(set(list(df["Security Id"])) & set(avail_comp))
    
for i in industry_company_map.keys():
    l = industry_company_map[i]
    #Industry, Number of companies in that industry to analyse
    print(i, len(l))
    for j in l:
        print(j)
    print("\n")

Oil Equipment & Services 4
OILCOUNTUB
ALPHAGEO
DOLPHINOFF
ASIANOI


Integrated Oil & Gas 2
RELIANCE
DEEPIND


Fibres & Plastics 7
AMCOIND
POLYLINK
DVL
VINYLINDIA
HINFLUR
INDIANACRY
JBFIND


Oil Marketing & Distribution 8
CONFIPET
AEGISLOG
IOC
TIDEWATER
GULFPETRO
CASTROLIND
GOCLCORP
PETRONET


Plastic Products 38
ARROWGREEN
CAPRIHANS
KRITIIND
FINOLEXIND
MAHEPC
JISLDVREQS
JISLJALEQS
ASTRAL
SIGNETIND
APOLLOPIPES
RUNGTAIR
RDBRL
TAINWALCHM
SRIKPRIND
ASHISHPO
BLOOM
HINDADH
WIMPLAST
ECOPLAST
ACRYSIL
NILKAMAL
TEXMOPIPES
TIMETECHNO
INTLCONV
DUTRON
NATPLAS
KKALPANAIND
BIOPAC
SUPREMEIND
MACPLASQ
FLEXITUFF
KISAN
RESPONIND
PRIMAPLA
TIJARIA
FENOPLAS
KINGFA
TPLPLAST


Refineries/ Petro-Products 7
CHENNPETRO
GOACARBON
BPCL
SOTL
HINDPETRO
MRPL
PANAMAPET


Petrochemicals 5
SUPPETRO
DCW
AGARIND
MANALIPETC
RAIN


Agrochemicals 19
BAYERCROP
INSECTICID
JUBLINDS
AIMCOPEST
MONSANTO
BHARATRAS
ASTEC
PHYTO
NACLIND
EXCELCROP
RALLIS
SUCROSA
ATUL
KILPEST
PUNJABCHEM
UPL
DHANUKA
BHAGCHEM
PIIND




# Analyse by industry

We aim to analyse a few companies from an industry to check if their trend is correlated with the trend of crude oil.

We have identified three types of trends. Namely, up (1), down (-1) and no (0).

### Analyse by hypothesis for each trend

Suppose we have a hypothesis, such that

Crude oil trend    | Trend of an ideal company belonging to industry X

    0       |       1
    1       |       -1
    -1      |       1
    
Note that there are 3^3 = 27 such hypotheses possible.

We generate a score to check what percentage of the actual trend observation adheres to our hypothesis.
The score for the industry is computed as the average score of all companies in that industry.

The score roughly translates to the probability that our hypothesis holds good.
A threshold of 0.7 has been set and any (hypothesis,industry_score) above 0.7 would be an insight.



In [8]:
### Analyse by industry
import itertools

def correlation_trend_analyse(industry, period, threshold = 0.7):
    print(industry, len(industry_company_map[industry]))
    df = oilTrend_df[['Year', 'CRUDEOIL']+industry_company_map[industry]]

    l = [0, 1, -1]
    patterns = [p for p in itertools.product(l, repeat=3)]

    if (period == 'yearly'):
        dfYear = df.iloc[:7][:]
    elif (period == 'half-yearly'):
        dfYear = df.iloc[7:21][:]
    elif (period == 'quarterly'):
        dfYear = df.iloc[21:49][:]
    elif (period == 'monthly'):
        dfYear = df.iloc[49:133][:]
        
    companies = list(dfYear.columns)[2:]
    crude_oil_trend = list(dfYear['CRUDEOIL'])
    #print(crude_oil_trend)
    for pattern in patterns:
        #print(pattern)
        trend_map = {'0' : str(pattern[0]), '1' : str(pattern[1]), '-1' : str(pattern[-1])}
        industry_trend_score = 0
        for company in companies:
            #print(company)
            trend_score = 0
            expected_trend = [trend_map[i] for i in crude_oil_trend]
            actual_trend = list(dfYear[company])
            span = len(actual_trend)
            for j in range(span):
                if actual_trend[j] == expected_trend[j]:
                    trend_score += 1
            industry_trend_score += trend_score/span
            #trend_score = trend_score/span
            #if trend_score > 0.7:
            #    print(company, trend_score)
        industry_trend_score = industry_trend_score/len(companies)
        if (industry_trend_score >= threshold):
            print(pattern, industry_trend_score)
        
for industry in industry_company_map.keys():
    print("Yearly")
    correlation_trend_analyse(industry, period = "yearly")
    print("Half Yearly")
    correlation_trend_analyse(industry, period = "half-yearly")
    print("Quarterly")
    correlation_trend_analyse(industry, period = "quarterly")
    print("Monthly")
    correlation_trend_analyse(industry, period = "monthly")
    


Yearly
Oil Equipment & Services 4
Half Yearly
Oil Equipment & Services 4
Quarterly
Oil Equipment & Services 4
Monthly
Oil Equipment & Services 4
Yearly
Integrated Oil & Gas 2
Half Yearly
Integrated Oil & Gas 2
Quarterly
Integrated Oil & Gas 2
Monthly
Integrated Oil & Gas 2
Yearly
Fibres & Plastics 7
Half Yearly
Fibres & Plastics 7
Quarterly
Fibres & Plastics 7
Monthly
Fibres & Plastics 7
Yearly
Oil Marketing & Distribution 8
Half Yearly
Oil Marketing & Distribution 8
Quarterly
Oil Marketing & Distribution 8
Monthly
Oil Marketing & Distribution 8
Yearly
Plastic Products 38
Half Yearly
Plastic Products 38
Quarterly
Plastic Products 38
Monthly
Plastic Products 38
Yearly
Refineries/ Petro-Products 7
Half Yearly
Refineries/ Petro-Products 7
Quarterly
Refineries/ Petro-Products 7
Monthly
Refineries/ Petro-Products 7
Yearly
Petrochemicals 5
Half Yearly
Petrochemicals 5
Quarterly
Petrochemicals 5
Monthly
Petrochemicals 5
Yearly
Agrochemicals 19
Half Yearly
Agrochemicals 19
Quarterly
Agrochemic

In [6]:
df.head(n=133)

Unnamed: 0,Security Code,Security Id,Security Name,Status,Group,Face Value,ISIN No,Industry,Instrument
175,500027,ATUL,ATUL LTD.,Active,A,10.0,INE100A01010,Agrochemicals,Equity
176,500286,MONTA,MONTARI INDUSTRIES LTD.,Delisted,Z,10.0,INE407C01015,Agrochemicals,Equity
177,500355,RALLIS,RALLIS INDIA LTD.,Active,A,1.0,INE613A01020,Agrochemicals,Equity
178,503776,MODIPON,MODIPON LTD.,Active,X,10.0,INE170C01019,Agrochemicals,Equity
179,506129,NGRJFIN-B,NAGARJUNA FINANCE LTD.,Delisted,P,10.0,,Agrochemicals,Equity
180,506265,ARLABS,ARLABS LTD.,Delisted,P,10.0,,Agrochemicals,Equity
181,506285,BAYERCROP,BAYER CROPSCIENCE LTD.,Active,A,10.0,INE462A01022,Agrochemicals,Equity
182,506580,PBGLOBAL,PB Global Ltd,Suspended,P,10.0,INE615W01011,Agrochemicals,Equity
183,506618,PUNJABCHEM,PUNJAB CHEMICALS AND CROP PROTECTION LTD.-$,Active,B,10.0,INE277B01014,Agrochemicals,Equity
184,507717,DHANUKA,DHANUKA AGRITECH LTD.,Active,B,2.0,INE435G01025,Agrochemicals,Equity


### Analyse by hypothesis for individual trend

Here the assumption is for example, whenever crude oil trend is 0, the company trend would be 1 and we are not worried about the cases where crude oil trend is 1 or -1. So only one trend of crude oil is focussed.

There are 9 possible hypotheses.
When crude_oil_trend = 0, company_trend = 0 or 1 or -1
When crude_oil_trend = 1, company_trend = 0 or 1 or -1
When crude_oil_trend = -1, company_trend = 0 or 1 or -1

Here, confidence score is used as a metric. If the confidence score of the company_trend given the crude_oil trend is greater than the threshold, then we have an insight.



In [7]:
import itertools

def correlation_trend_analyse_oneTrendAssumption(industry, period, threshold = 0.7):
    print(industry, len(industry_company_map[industry]))
    df = oilTrend_df[['Year', 'CRUDEOIL']+industry_company_map[industry]]

    l = ['0', '1', '-1']
    trend_patterns = {'0' : l, '1' : l, '-1' : l}
    #patterns = [p for p in itertools.product(l, repeat=3)]

    if (period == 'yearly'):
        dfYear = df.iloc[:7][:]
    elif (period == 'half-yearly'):
        dfYear = df.iloc[7:21][:]
    elif (period == 'quarterly'):
        dfYear = df.iloc[21:49][:]
    elif (period == 'monthly'):
        dfYear = df.iloc[49:133][:]
        
    companies = list(dfYear.columns)[2:]
    crude_oil_trend = list(dfYear['CRUDEOIL'])
    
    for key in trend_patterns.keys():
        #print("Crude oil trend ", key)
        for i in l:
            #print("Company trend ", i)
            industry_trend_score = 0
            for company in companies:
                trend_score = 0
                #print(crude_oil_trend)
                actual_trend = list(dfYear[company])
                span = len(actual_trend)
                #print(actual_trend)
                #expected_trend = [x if x!=i else i for x in actual_trend]
                for k in range(span):
                    if (actual_trend[k] == i and crude_oil_trend[k] == key):
                        trend_score += 1
                    elif (actual_trend[k] != i and crude_oil_trend[k] == key):
                        trend_score += 0
                    else:
                        trend_score += 0
                
                #print(trend_score/crude_oil_trend.count(key))
                industry_trend_score += trend_score/span
            industry_trend_score = industry_trend_score/len(companies)
            if (industry_trend_score >= threshold):
                print(key,i, industry_trend_score)

for industry in industry_company_map.keys():
    print("Yearly")
    correlation_trend_analyse_oneTrendAssumption(industry, period = "yearly")
    print("Half Yearly")
    correlation_trend_analyse_oneTrendAssumption(industry, period = "half-yearly")
    print("Quarterly")
    correlation_trend_analyse_oneTrendAssumption(industry, period = "quarterly")
    print("Monthly")
    correlation_trend_analyse_oneTrendAssumption(industry, period = "monthly")

    

Yearly
Oil Equipment & Services 4
Half Yearly
Oil Equipment & Services 4
Quarterly
Oil Equipment & Services 4
Monthly
Oil Equipment & Services 4
Yearly
Integrated Oil & Gas 2
Half Yearly
Integrated Oil & Gas 2
Quarterly
Integrated Oil & Gas 2
Monthly
Integrated Oil & Gas 2
Yearly
Fibres & Plastics 7
Half Yearly
Fibres & Plastics 7
Quarterly
Fibres & Plastics 7
Monthly
Fibres & Plastics 7
Yearly
Oil Marketing & Distribution 8
Half Yearly
Oil Marketing & Distribution 8
Quarterly
Oil Marketing & Distribution 8
Monthly
Oil Marketing & Distribution 8
Yearly
Plastic Products 38
Half Yearly
Plastic Products 38
Quarterly
Plastic Products 38
Monthly
Plastic Products 38
Yearly
Refineries/ Petro-Products 7
Half Yearly
Refineries/ Petro-Products 7
Quarterly
Refineries/ Petro-Products 7
Monthly
Refineries/ Petro-Products 7
Yearly
Petrochemicals 5
Half Yearly
Petrochemicals 5
Quarterly
Petrochemicals 5
Monthly
Petrochemicals 5
Yearly
Agrochemicals 19
Half Yearly
Agrochemicals 19
Quarterly
Agrochemic