In [1]:
import io
import requests
import zipfile

# Fama French Industries

# get different groups txt, if needed
groups = ['Siccodes5', 'Siccodes10', 'Siccodes12', 'Siccodes17', 'Siccodes30', 'Siccodes38', 'Siccodes48', 'Siccodes49']
url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
for s in groups:
    r = requests.get(url+s+'.zip')
    f = io.BytesIO(r.content)
    f = zipfile.ZipFile(f)
    f.extractall()

# Set up a dictionary with keys = industry name, values = corresponding intervals of SIC codes
# Inputs: Siccodes5.txt, Siccodes10.txt, Siccodes12.txt, Siccodes17.txt, Siccodes30.txt, Siccodes38.txt,
#         Siccodes48.txt, Siccodes49.txt
def sic2industry(txt):
    f = open(txt, 'r', encoding='utf-8')
    lines = f.readlines()
    lines = [l.strip() for l in lines]
    f.close()

    # categorize the industry name and sic codes
    dict = {}
    for line in lines:
        first = line.split(' ')[0]
        # If the first element is a number, the industry name should be the second element of this line
        if len(line) > 0 and len(first) < 3:
            name = line.split()[2:]
            industry = ' '.join(name)
            dict[industry] = []
        # When the industry name is retrieved, the sic codes below all belong to this category
        if len(line) > 0 and len(first) >= 3:
            line_interval = line.replace('-', ' ').split(' ')
            interval = [int(line_interval[0]), int(line_interval[1])]
            dict[industry].append(interval)

    return dict

# Functions for FFIndustry
# ff5
def ff5(sic):
    dict_ff5 = sic2industry('Siccodes5.txt')
    for industry in dict_ff5:
        for interval in dict_ff5[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff10
def ff10(sic):
    dict_ff10 = sic2industry('Siccodes10.txt')
    for industry in dict_ff10:
        for interval in dict_ff10[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff12
def ff12(sic):
    dict_ff12 = sic2industry('Siccodes12.txt')

    for industry in dict_ff12:
        for interval in dict_ff12[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff17
def ff17(sic):
    dict_ff17 = sic2industry('Siccodes17.txt')

    for industry in dict_ff17:
        for interval in dict_ff17[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff30
def ff30(sic):
    dict_ff30 = sic2industry('Siccodes30.txt')

    for industry in dict_ff30:
        for interval in dict_ff30[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff38
def ff38(sic):
    dict_ff38 = sic2industry('Siccodes38.txt')

    for industry in dict_ff38:
        for interval in dict_ff38[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff48
def ff48(sic):
    dict_ff48 = sic2industry('Siccodes48.txt')

    for industry in dict_ff48:
        for interval in dict_ff48[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# ff49
def ff49(sic):
    dict_ff49 = sic2industry('Siccodes49.txt')

    for industry in dict_ff49:
        for interval in dict_ff49[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# Example:
ff5(4920)


# GM: 1999 paper by by Grinblatt and Moskowitz
dict_GM = {'Mining': [[10, 14]],
           'Food': [[20, 20]],
           'Apparel': [[22, 23]],
           'Paper': [[26, 26]],
           'Chemical': [[28, 28]],
           'Petroleum': [[29, 29]],
           'Construction': [[32, 32]],
           'Prim. Metals': [[33, 33]],
           'Fab. Metals': [[34, 34]],
           'Machinery': [[35, 35]],
           'Electrical Eq.': [[36, 36]],
           'Transport Eq.': [[37, 37]],
           'Manufacturing': [[38, 39]],
           'Railroads': [[40, 40]],
           'Other Transport.': [[41, 47]],
           'Utilities': [[49, 49]],
           'Dept. Stores': [[53, 53]],
           'Retail': [[50, 52], [54, 59]],
           'Financial': [[60, 69]],
           }

# Function for GMIndustry
def GMindustry(sic):
    for industry in list(dict_GM.keys()):
        for interval in dict_GM[industry]:
            if sic >= interval[0] and sic <= interval[1]:
                return industry
    return 'Other'

# Example
# GMindustry(49)

In [2]:
dict_ff49 = sic2industry('Siccodes49.txt')
dict_ff49

{'Agriculture': [[100, 199], [200, 299], [700, 799], [910, 919], [2048, 2048]],
 'Food Products': [[2000, 2009],
  [2010, 2019],
  [2020, 2029],
  [2030, 2039],
  [2040, 2046],
  [2050, 2059],
  [2060, 2063],
  [2070, 2079],
  [2090, 2092],
  [2095, 2095],
  [2098, 2099]],
 'Candy & Soda': [[2064, 2068],
  [2086, 2086],
  [2087, 2087],
  [2096, 2096],
  [2097, 2097]],
 'Beer & Liquor': [[2080, 2080],
  [2082, 2082],
  [2083, 2083],
  [2084, 2084],
  [2085, 2085]],
 'Tobacco Products': [[2100, 2199]],
 'Recreation': [[920, 999],
  [3650, 3651],
  [3652, 3652],
  [3732, 3732],
  [3930, 3931],
  [3940, 3949]],
 'Entertainment': [[7800, 7829],
  [7830, 7833],
  [7840, 7841],
  [7900, 7900],
  [7910, 7911],
  [7920, 7929],
  [7930, 7933],
  [7940, 7949],
  [7980, 7980],
  [7990, 7999]],
 'Printing and Publishing': [[2700, 2709],
  [2710, 2719],
  [2720, 2729],
  [2730, 2739],
  [2740, 2749],
  [2770, 2771],
  [2780, 2789],
  [2790, 2799]],
 'Consumer Goods': [[2047, 2047],
  [2391, 2392],
 

In [4]:
import pandas as pd
lst1 = [(x,a) for x in dict_ff49 for [a, b] in dict_ff49[x] ]
lst2 = [(x,b) for x in dict_ff49 for [a, b] in dict_ff49[x] ]
df = pd.DataFrame(dtype=int, index=[x for (x,a) in lst1])
df["lower"] = [a for (x, a) in lst1]
df["upper"] = [b for (x, b) in lst2]
df.to_excel("docs/ff49.xlsx")

In [24]:
df[(df.lower<=5148) & (df.upper>=5148)].index

Index(['Whlsl'], dtype='object')

In [12]:
dict_ff49

{'Agric': [[100, 199], [200, 299], [700, 799], [910, 919], [2048, 2048]],
 'Food': [[2000, 2009],
  [2010, 2019],
  [2020, 2029],
  [2030, 2039],
  [2040, 2046],
  [2050, 2059],
  [2060, 2063],
  [2070, 2079],
  [2090, 2092],
  [2095, 2095],
  [2098, 2099]],
 'Soda': [[2064, 2068],
  [2086, 2086],
  [2087, 2087],
  [2096, 2096],
  [2097, 2097]],
 'Beer': [[2080, 2080],
  [2082, 2082],
  [2083, 2083],
  [2084, 2084],
  [2085, 2085]],
 'Smoke': [[2100, 2199]],
 'Toys': [[920, 999],
  [3650, 3651],
  [3652, 3652],
  [3732, 3732],
  [3930, 3931],
  [3940, 3949]],
 'Fun': [[7800, 7829],
  [7830, 7833],
  [7840, 7841],
  [7900, 7900],
  [7910, 7911],
  [7920, 7929],
  [7930, 7933],
  [7940, 7949],
  [7980, 7980],
  [7990, 7999]],
 'Books': [[2700, 2709],
  [2710, 2719],
  [2720, 2729],
  [2730, 2739],
  [2740, 2749],
  [2770, 2771],
  [2780, 2789],
  [2790, 2799]],
 'Hshld': [[2047, 2047],
  [2391, 2392],
  [2510, 2519],
  [2590, 2599],
  [2840, 2843],
  [2844, 2844],
  [3160, 3161],
  [3170

In [30]:
txt = "Siccodes49.txt"
f = open(txt, 'r', encoding='utf-8')
lines = f.readlines()
lines = [l.strip() for l in lines]
f.close()


for line in lines:
    first = line.split(' ')[0]
    
    # If the first element is a number, the industry name should be the second element of this line
    if len(line) > 0 and len(first) < 3:
        name = line.split()[2:]
        print(' '.join(name))

Agriculture
Food Products
Candy & Soda
Beer & Liquor
Tobacco Products
Recreation
Entertainment
Printing and Publishing
Consumer Goods
Apparel
Healthcare
Medical Equipment
Pharmaceutical Products
Chemicals
Rubber and Plastic Products
Textiles
Construction Materials
Construction
Steel Works Etc
Fabricated Products
Machinery
Electrical Equipment
Automobiles and Trucks
Aircraft
Shipbuilding, Railroad Equipment
Defense
Precious Metals
Non-Metallic and Industrial Metal Mining
Coal
Petroleum and Natural Gas
Utilities
Communication
Personal Services
Business Services
Computers
Computer Software
Electronic Equipment
Measuring and Control Equipment
Business Supplies
Shipping Containers
Transportation
Wholesale
Retail
Restaurants, Hotels, Motels
Banking
Insurance
Real Estate
Trading
Almost Nothing
