In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
strains = pd.read_csv('strains.csv')

In [3]:
strains.head(3)

Unnamed: 0.1,Unnamed: 0,name,type,rating,reviews,thc,growth_difficulty,plant_height,yield_oz_per_ft_sqrd,flowering_weeks,...,fibromyalgia,epilepsy,anorexia,spasticity,dry_mouth,dry_eyes,paranoid,dizzy,anxious,headache
0,1,Blue Dream,Hybrid,4.3,13203.0,19.0,Moderate,> 78,3 - 6,10 - 12,...,,,,,31.0,16.0,7.0,7.0,4.0,
1,2,Gelato,Hybrid,4.7,1555.0,17.0,,,,,...,,,,,24.0,11.0,4.0,4.0,4.0,
2,3,Sour Diesel,Sativa,4.3,7633.0,19.0,Moderate,> 78,1 - 3,10 - 12,...,,,,,35.0,19.0,10.0,8.0,,5.0


Network files for Gephi

In [4]:
# nodes file
nodes = strains[['name','type','thc','energy']].reset_index().rename({'index': 'id', 'name': 'label'}, axis=1)
nodes.to_csv('nodes.csv', index=False)

In [5]:
# edges file
nodes = pd.read_csv('nodes.csv')
f = open('edges.csv','w')
f.write('source,target\n')
for index, row in strains.iterrows():
    # check if parent is NAN
    if not pd.isnull(row['parent1']):
        # add edge if parent exists in nodes.csv
        if row['parent1'] in nodes.label.values:
            # source=id=index, target=id of parent
            f.write('{},{}\n'.format(index, nodes[nodes.label == row['parent1']].id.iloc[0]))
    if not pd.isnull(row['parent2']):
        if row['parent2'] in nodes.label.values:
            f.write('{},{}\n'.format(index, nodes[nodes.label == row['parent2']].id.iloc[0]))
f.close()

Top 10 strain files for Tableau

Includes the strain and its children

In [6]:
# top strains were discovered in network visualization through GEPHI (check GitHub)
top = ['OG Kush', 'Afghani', 'Skunk No. 1', 'Blueberry', 'GSC', 'Sour Diesel', 'White Widow', 'Blue Dream', 'Haze', 'Jack Herer']

for strain in top:
    f = open('{}.csv'.format(strain.replace(" ", "_")),'w')
    f.write('label,name,type,shape,thc,energy\n')
    # get strain 
    s = nodes[nodes.label == strain]
    f.write('{},{},{},{},{},{}\n'.format(strain, strain, s.type.iloc[0], 'Parent', s.thc.iloc[0], s.energy.iloc[0]))

    # children: all strains that have the given strain as a parent
    children = strains.loc[(strains.parent1 == strain) | (strains.parent2 == strain)].reset_index(drop=True)
    for index, row in children.iterrows():
        # add child
        f.write('{},{},{},{},{},{}\n'.format(index+1, row['name'], row['type'], 'Child', row['thc'], row['energy']))
        
    f.close()

(Optional) Adds breeding partners into the mix

In [None]:
# top strains were discovered in network visualization through GEPHI (check GitHub)
top = ['OG Kush', 'Afghani', 'Skunk No. 1', 'Blueberry', 'GSC', 'Sour Diesel', 'White Widow', 'Blue Dream', 'Haze', 'Jack Herer']

for strain in top:
    f = open('{}+breeding_partners.csv'.format(strain.replace(" ", "_")),'w')
    f.write('label,name,type,shape,thc,energy\n')
    # get strain 
    s = nodes[nodes.label == strain]
    f.write('{},{},{},{},{},{}\n'.format(strain, strain, s.type.iloc[0], 1, s.thc.iloc[0], s.energy.iloc[0]))

    # family: all strains that have the given strain as a parent
    family = strains.loc[(strains.parent1 == strain) | (strains.parent2 == strain)].reset_index(drop=True)
    for index, row in family.iterrows():
        # labeled the same number: child and mate, aka child's other parent
        # add child
        f.write('{},{},{},{},{},{}\n'.format(index+1, row['name'], row['type'], 2, row['thc'], row['energy']))
        # add mate
        if row['parent1'] != strain and not pd.isnull(row['parent1']):
            mate = nodes[nodes.label == row['parent1']]
            f.write('{},{},{},{},{},{}\n'.format(index+1, mate.label.iloc[0], mate.type.iloc[0], 1, mate.thc.iloc[0], mate.energy.iloc[0]))
        
        if row['parent2'] != strain and not pd.isnull(row['parent2']):
            mate = nodes[nodes.label == row['parent2']]
            f.write('{},{},{},{},{},{}\n'.format(index+1, mate.label.iloc[0], mate.type.iloc[0], 1, mate.thc.iloc[0], mate.energy.iloc[0]))
    f.close()