In [15]:
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta
from glob import glob
import plotly.express as px
import plotly.graph_objects as go

In [2]:
def crosses():
  dfiles = glob("./data/crosses/*BananaData.csv")
  data = [pd.read_csv(f) for f in dfiles]
  df = pd.concat(data, ignore_index=True)
  df['First_Pollination_Date'] = pd.to_datetime(df["First_Pollination_Date"], utc=True)
  df["Bunch_Harvest_Date"] = pd.to_datetime(df["Bunch_Harvest_Date"], utc=True)
  df["Seed_Extraction_Date"] = pd.to_datetime(df["Seed_Extraction_Date"], utc=True)
  df["Embryo_Rescue_Date"] = pd.to_datetime(df["Embryo_Rescue_Date"], utc=True)
  df["Germination_Date"] = pd.to_datetime(df["Germination_Date"], utc=True)
  df['First_Pollination_Date'] = pd.to_datetime(df["First_Pollination_Date"]).dt.date
  df["Bunch_Harvest_Date"] = pd.to_datetime(df["Bunch_Harvest_Date"]).dt.date
  df["Seed_Extraction_Date"] = pd.to_datetime(df["Seed_Extraction_Date"]).dt.date
  df["Embryo_Rescue_Date"] = pd.to_datetime(df["Embryo_Rescue_Date"]).dt.date
  df["Germination_Date"] = pd.to_datetime(df["Germination_Date"]).dt.date
  df[['Total_Seeds', 'Good_Seeds','Number_of_Embryo_Rescued','Number_of_Embryo_Germinating','Cycle']] = df[['Total_Seeds', 'Good_Seeds','Number_of_Embryo_Rescued','Number_of_Embryo_Germinating','Cycle']].apply(pd.to_numeric)
  df['Year']  = pd.DatetimeIndex(df['First_Pollination_Date']).year
  df['Month']  = pd.DatetimeIndex(df['First_Pollination_Date']).month
  df['Day']  = pd.DatetimeIndex(df['First_Pollination_Date']).day
  return df

In [6]:
crosses_count = dt.pivot_table(index=['Location','Year'], aggfunc='size')

Location  Year  
Arusha    2018.0    238
          2019.0    499
          2020.0    645
          2021.0     93
Kawanda   2020.0    323
          2021.0     64
Sendusu   2018.0    192
          2019.0    729
          2020.0    684
          2021.0    128
dtype: int64

In [7]:
crosses_count = pd.DataFrame(crosses_count)
crosses_count.columns = ['N']
crosses_count.reset_index(inplace=True)
col = crosses_count.columns[1]
crosses_count[col] = crosses_count[col].astype("category")

In [23]:
crosses_count

Unnamed: 0,Location,Year,N
0,Arusha,2018.0,238
1,Arusha,2019.0,499
2,Arusha,2020.0,645
3,Arusha,2021.0,93
4,Kawanda,2020.0,323
5,Kawanda,2021.0,64
6,Sendusu,2018.0,192
7,Sendusu,2019.0,729
8,Sendusu,2020.0,684
9,Sendusu,2021.0,128


In [148]:
def bar_plot(df):
    fig = go.Figure()
    yrs = df['Year'].unique().tolist()
    years = [int(i) for i in yrs]  
    loc = df['Location'].unique().tolist()
    dt = [pd.DataFrame(y) for x, y in df.groupby('Location', as_index=False)]
    colors = px.colors.qualitative.G10 
    for i in range(len(dt)):
        L = loc[i]
        fig.add_trace(go.Bar(x=dt[i]['Year'],
                    y=dt[i]['N'],
                    name=L,
                    marker_color=colors[i]
                    ))
    fig.update_layout(
    title='Total Number of Crosses',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
    )
    return fig

bar_plot(crosses_count)    

In [67]:
crosses_count.head()

Unnamed: 0,Location,Year,N
0,Arusha,2018.0,238
1,Arusha,2019.0,499
2,Arusha,2020.0,645
3,Arusha,2021.0,93
4,Kawanda,2020.0,323


In [64]:
dt = [pd.DataFrame(y) for x, y in crosses_count.groupby('Location', as_index=False)]
dt[0]['Location'][0]

'Arusha'

In [47]:
grouped = crosses_count.groupby(crosses_count.Location)
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000019006C66A00>

In [48]:
dict_of_regions = dict(iter(crosses_count.groupby('Location')))

In [53]:
len(dict_of_regions)

3

In [32]:
fig.add_trace(go.Bar(x=years,
                y=[16, 13, 10, 11, 28, 37, 43, 55, 56, 88, 105, 156, 270,
                   299, 340, 403, 549, 499],
                name='China',
                marker_color='rgb(26, 118, 255)'
                ))

In [33]:
fig.update_layout(
    title='US Export of Plastic Scrap',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='USD (millions)',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [34]:
years = ['2016','2017','2018']
fig = go.Figure()

In [35]:
fig.add_trace(go.Bar(x=years, y=[500, 600, 700],
                base=[-500,-600,-700],
                marker_color='crimson',
                name='expenses'))


In [None]:
fig.add_trace(go.Bar(x=years, y=[300, 400, 700],
                base=0,
                marker_color='lightslategrey',
                name='revenue'
                ))

fig.show()

In [50]:
DF = pd.DataFrame({'chr':["chr3","chr3","chr7","chr6","chr1"],'pos':[10,20,30,40,50],})
DF

Unnamed: 0,chr,pos
0,chr3,10
1,chr3,20
2,chr7,30
3,chr6,40
4,chr1,50


In [52]:
ans = [pd.DataFrame(y) for x, y in DF.groupby('chr', as_index=False)]


Unnamed: 0,chr,pos
4,chr1,50
