In [1]:
import pandas as pd
import numpy as np
import pprint
import json
import math

In [2]:
pod_df=pd.read_csv('../Inputs/edamame_shoot_architecture_images_2020_classifications.csv')
pod_df['annotations']

0      [{"task":"T0","task_label":"label for images f...
1      [{"task":"T0","task_label":"label for images f...
2      [{"task":"T0","task_label":"label for images f...
3      [{"task":"T0","task_label":"label for images f...
4      [{"task":"T0","task_label":"label for images f...
                             ...                        
447    [{"task":"T0","task_label":"Label","value":[{"...
448    [{"task":"T0","task_label":"Label","value":[{"...
449    [{"task":"T0","task_label":"Label","value":[{"...
450    [{"task":"T0","task_label":"Label","value":[{"...
451    [{"task":"T0","task_label":"label for images f...
Name: annotations, Length: 452, dtype: object

In [3]:
pod_annotations = pod_df['annotations'] #annotation information
pod_single = json.loads(pod_annotations[0])[0]# annotation information of a single (last) image
print(pod_single)
value_single = pod_single['value']# values of annotation information of a single (last) image, excludes the header/coumnn names
print(value_single)

{'task': 'T0', 'task_label': 'label for images from 1-100', 'value': [{'x': 307.3125, 'y': 304.75, 'tool': 3, 'frame': 0, 'details': [], 'tool_label': 'First Pod Height'}, {'x1': 91.3125, 'x2': 155.3125, 'y1': 384.75, 'y2': 39.75, 'tool': 4, 'frame': 0, 'details': [], 'tool_label': 'Length of Vertical Ruler'}, {'x': 291.3125, 'y': 23.75, 'tool': 0, 'frame': 0, 'details': [], 'tool_label': 'Top'}, {'x': 304.3125, 'y': 401.75, 'tool': 1, 'frame': 0, 'details': [], 'tool_label': 'Bottom '}, {'x': 310.3125, 'y': 362.75, 'tool': 5, 'frame': 0, 'details': [], 'tool_label': 'Branching Points'}, {'x': 299.3125, 'y': 325.75, 'tool': 5, 'frame': 0, 'details': [], 'tool_label': 'Branching Points'}, {'x1': 305.3125, 'x2': 298.3125, 'y1': 399.75, 'y2': 164.75, 'tool': 2, 'frame': 0, 'details': [], 'tool_label': 'Plant Length'}, {'x': 304.3125, 'y': 223.75, 'tool': 5, 'frame': 0, 'details': [], 'tool_label': 'Branching Points'}, {'x': 296.3125, 'y': 160.75, 'tool': 5, 'frame': 0, 'details': [], 'too

In [4]:
def distance(point1,point2):
  x1,y1 = point1
  x2,y2 = point2 
  return ((x1-x2)**2 + (y1-y2)**2 )**0.5

#parse from dict and get top, bottom, branching points and ruler as a tuple
def parseData(value):
  data = dict()
  data['top'] = ()
  data['bottom'] = ()
  data['branching_points'] = []
  data['ruler']=()
  data['plant_length_points'] = []
  for args in value: 
    if args['tool_label'].strip() == 'Top':
      data['top'] = (args['x'],args['y'])#cartesian values of top of the plant
    if args['tool_label'].strip() == 'Bottom':
      data['bottom'] = (args['x'],args['y'])#cartesian values of bottom of the plant
    if args['tool_label'].strip() == 'Branching Points':
      data['branching_points'].append( (args['x'],args['y']))# There are many branching points so the information is in the form of list 
    if args['tool_label'].strip() == 'Length of Vertical Ruler':
      data['ruler'] = ((args['x1'],args['y1']), (args['x2'],args['y2'])) #cartesian values of the two extreme points of the ruler
    if args['tool_label'].strip() =='First Pod Height':
        data['first_pod'] = (args['x'],args['y'])#cartesian values of First Pod Height
    if args['tool_label'] == 'Plant Length':
        data['plant_length_points'].append([(args['x1'],args['y1']),(args['x2'],args['y2'])])#cartesian values of Plant Length
  return data

In [5]:
parseData(value_single)


{'top': (291.3125, 23.75),
 'bottom': (304.3125, 401.75),
 'branching_points': [(310.3125, 362.75),
  (299.3125, 325.75),
  (304.3125, 223.75),
  (296.3125, 160.75)],
 'ruler': ((91.3125, 384.75), (155.3125, 39.75)),
 'plant_length_points': [[(305.3125, 399.75), (298.3125, 164.75)],
  [(292.3125, 163.75), (268.3125, 86.75)],
  [(263.3125, 81.75), (290.3125, 25.75)]],
 'first_pod': (307.3125, 304.75)}

In [6]:
def calculateHeight(single_data, ruler_length=91.5):
  data = parseData(single_data)#merge all data into a single_data
  length_cm = 90.5
  if len(data['ruler']) != 2 or len(data['bottom'])!=2 or len(data['top'])!=2: #Return -1 if the data points of the ruler, top, bottom are not exactly equal to 2
    return (0,0,0,0,0,0)

  cm_to_pixel_length = 91.5 / distance(data['ruler'][0],data['ruler'][1])# to convert pixels of ruler length to cm
    
  branching_points_with_dist = data['branching_points'].copy()
  branch_count = len(data['branching_points'])
  #arrange branches in order from bottom
  # average_internode = 0
  for i in range(branch_count):
    branch = data['branching_points'][i]
    if len(branch)!= 2:
      return (0,0,0,0,0,0)
    dist = distance(data['bottom'], branch)*cm_to_pixel_length
    branching_points_with_dist[i] = (dist,branch)# Calculates the length of each branchng points from the bottom of the plant

  branching_points_with_dist.sort()

  plant_height_in_pixel = distance(data['top'],data['bottom'])# plant height or the distance between top and bottom of the plant in pixel
  plant_height_in_cm = cm_to_pixel_length * plant_height_in_pixel #plant height in cm
  
  #plant length calculation
  plant_length = 0
  for i in data['plant_length_points']:
    if len(i) != 2:
        plant_length += 0
    else:
        p1 = i[0]
        p2 = i[1]
        plant_length +=  distance(p1,p2)
  # print(branching_points_with_dist) #length in cm
  # print(plant_height_in_cm) #height in cm
  plant_length_in_cm = cm_to_pixel_length*plant_length
  #first pod length calculation 
  if len(data['first_pod']) == 2:
    first_pod_length = abs(data['bottom'][1] - data['first_pod'][1]) * cm_to_pixel_length
  else:
    first_pod_length = -1
     
  #internode average length
  s = 0
  for i in range(branch_count - 1):
    s += distance(branching_points_with_dist[i][1],branching_points_with_dist[i+1][1])*cm_to_pixel_length
  

  if branch_count == 0:# for the unbranchd plants
    first_branch_length = 0
    average_internode = 0
  else:
    first_branch_length = branching_points_with_dist[0][0]# length of first branch from the bottom of the plant which is the First Internode length
    average_internode = s/branch_count
  return (plant_height_in_cm, first_branch_length, branch_count,plant_length_in_cm,first_pod_length,average_internode)
  
#print("For this data point")
#print(value_single)
print("calculated Values for this data point viz. (Plant_Ht, FIL, # of Branches,Plant Length, First pod height, Average internode)")
print(calculateHeight(value_single))

calculated Values for this data point viz. (Plant_Ht, FIL, # of Branches,Plant Length, First pod height, Average internode)
(98.62874612185593, 10.28961992563936, 4, 98.55141724065878, 25.29453856579815, 13.314112779382786)


In [7]:
def heightCalculator(single_row,type='all'):
  annotation = single_row['annotations']
  single_data = json.loads(annotation)[0]
  # print(single_data)
  if not single_data['task'] == 'T0':
    output = (-1,-1,-1,-1,-1,-1)
  else:
    output =  calculateHeight(single_data['value'])
  if type=='plant_height':
    return output[0]
  if type == 'first_branch_height':
    return output[1]
  if type=='branch_count':
    return output[2]
  if type=='plant_length':
        return output[3]
  if type=='first_pod_height':
        return output[4]
  if type=='average_internode_length':
        return output[5]
  return output

In [8]:
def parseName(single_row):
  subject = json.loads(single_row['subject_data'])
  return subject[list(subject.keys())[0]]['Filename']

In [9]:
output = pd.DataFrame()
output['Filename'] = pod_df.apply(parseName,axis=1)
params = pod_df.apply(heightCalculator,axis=1)
# print(params.head())
output['plant_height'] = pod_df.apply(heightCalculator,axis=1,args=('plant_height',))
output['first_branch_height'] = pod_df.apply(heightCalculator,axis=1,args=('first_branch_height',))
output['branch_count'] = pod_df.apply(heightCalculator,axis=1,args=('branch_count',))
output['plant_length'] = pod_df.apply(heightCalculator,axis=1,args=('plant_length',))
output['first_pod_height'] = pod_df.apply(heightCalculator,axis=1,args=('first_pod_height',))
output['average_internode_length'] = pod_df.apply(heightCalculator,axis=1,args=('average_internode_length',))

output['plant_length'] #.head() #new dataframe with an extra column of heights

output.head()

Unnamed: 0,Filename,plant_height,first_branch_height,branch_count,plant_length,first_pod_height,average_internode_length
0,IMG_0776.jpg,98.628746,10.28962,4,98.551417,25.294539,13.314113
1,IMG_1066.jpg,116.458769,9.30127,5,116.016184,15.845817,5.059101
2,IMG_0850.jpg,90.576032,6.861124,8,85.62966,7.644391,4.079359
3,IMG_1008.jpg,106.427281,18.684691,5,100.703973,15.333792,6.690553
4,IMG_1056.jpg,97.045513,12.48702,4,89.864338,11.618995,2.993315


In [10]:
import matplotlib.pyplot as plt

In [11]:
output.shape

(452, 7)

In [12]:
output.isna().sum()

Filename                    0
plant_height                0
first_branch_height         0
branch_count                0
plant_length                0
first_pod_height            0
average_internode_length    0
dtype: int64

In [13]:
output.to_csv("../Outputs/Edamame_shoot_architecture_output_data_2020.csv")