In [22]:
import plotly.express as px
from mitosheet.public.v3 import *; # Analysis Name:id-dnjjcfibvx;
import pandas as pd

url_to_data = 'https://raw.githubusercontent.com/mito-ds/mito/3ccd31ca659853cc798d2a76eaca9b6572f8cc44/evals/test_cases/agent_find_and_update_tests/notebooks/warren_buffett_portfolio.csv'


In [34]:
# Imported warren_buffett_portfolio.csv
warren_buffett_portfolio = pd.read_csv(url_to_data)

# Changed Date to dtype datetime
warren_buffett_portfolio['Date'] = pd.to_datetime(warren_buffett_portfolio['Date'], format='%Y-%m-%d', errors='coerce')

# Analysis

In [24]:
# Pivoted warren_buffett_portfolio into warren_buffett_portfolio_pivot
tmp_df = warren_buffett_portfolio[['Industry', 'Num_Employees']].copy()
pivot_table = tmp_df.pivot_table(
    index=['Industry'],
    values=['Num_Employees'],
    aggfunc={'Num_Employees': ['mean', 'median']}
)
pivot_table = pivot_table.set_axis([flatten_column_header(col) for col in pivot_table.keys()], axis=1)
warren_buffett_portfolio_pivot = pivot_table.reset_index()


In [25]:
# Construct the graph and style it. Further customize your graph by editing this code.
# See Plotly Documentation for help: https://plotly.com/python/plotly-express/
fig = px.bar(warren_buffett_portfolio_pivot, x='Industry', y='Num_Employees mean')
fig.update_layout(
        title='Industry, Num_Employees mean bar chart', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        barmode='group', 
        paper_bgcolor='#FFFFFF'
    )
fig.show(renderer="iframe")

In [26]:
# 
warren_buffett_portfolio_copy = warren_buffett_portfolio.copy(deep=True)
warren_buffett_portfolio_copy.insert(1, 'YEAR-MONTH', CONCAT(YEAR(warren_buffett_portfolio_copy['Date']), "-", MONTH( ENDOFBUSINESSMONTH(warren_buffett_portfolio_copy['Date']))))
warren_buffett_portfolio_copy = warren_buffett_portfolio_copy.drop_duplicates(subset=['YEAR-MONTH', 'Symbol'], keep='last')
warren_buffett_portfolio_copy = warren_buffett_portfolio_copy[warren_buffett_portfolio_copy['Date'] > pd.to_datetime('2018-02-25')]

In [27]:
# Filter the dataframe so that it does not crash the browser
warren_buffett_portfolio_filtered = warren_buffett_portfolio_copy.head(1000)

fig = px.line(warren_buffett_portfolio_filtered, x='Date', y='Close', color='Symbol', line_shape='linear')
fig.update_layout(
        title='Date, Close (first 1000 rows) line', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        paper_bgcolor='#FFFFFF'
    )
fig.show(renderer="iframe")

In [28]:
# Imported warren_buffett_portfolio.csv
warren_buffett_portfolio_daily_change = pd.read_csv(url_to_data)

# Added column 'DoD Delta'
warren_buffett_portfolio_daily_change.insert(5, 'DoD Delta', IF(warren_buffett_portfolio_daily_change['Symbol']==warren_buffett_portfolio_daily_change['Symbol'].shift(1),(warren_buffett_portfolio_daily_change['Close']-warren_buffett_portfolio_daily_change['Close'].shift(1, fill_value=0))/warren_buffett_portfolio_daily_change['Close'].shift(1, fill_value=0),0))

# Deleted columns Volume, Adj Close, Name, Sector, Industry, Num_Employees, High, Low, Open
warren_buffett_portfolio_daily_change.drop(['Volume', 'Adj Close', 'Name', 'Sector', 'Industry', 'Num_Employees', 'High', 'Low', 'Open'], axis=1, inplace=True)

# Sorted DoD Delta in descending order
top_100_dod_changes = warren_buffett_portfolio_daily_change.sort_values(by='DoD Delta', ascending=False, na_position='last').head(100)
worst_100_dod_changes = warren_buffett_portfolio_daily_change.sort_values(by='DoD Delta', ascending=True, na_position='last').head(100)


In [29]:
fig_one = px.bar(top_100_dod_changes, x='Symbol', y='DoD Delta')
fig_one.update_layout(
        title='Symbol, DoD Delta bar chart', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        barmode='group', 
        paper_bgcolor='#FFFFFF'
    )
fig_one.show(renderer="iframe")

fig_two = px.bar(worst_100_dod_changes, x='Symbol', y='DoD Delta')
fig_two.update_layout(
        title='Symbol, DoD Delta bar chart', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        barmode='group', 
        paper_bgcolor='#FFFFFF'
    )
fig_two.show(renderer="iframe")

In [30]:
warren_buffett_portfolio = pd.read_csv(url_to_data)

In [31]:
from mitosheet.public.v3 import *; 
import pandas as pd

# Added column 'Market Cap'
warren_buffett_portfolio.insert(6, 'Market Cap', warren_buffett_portfolio['Volume']*warren_buffett_portfolio['Close'])

# Sorted Market Cap in ascending order
warren_buffett_portfolio = warren_buffett_portfolio.sort_values(by='Market Cap', ascending=True, na_position='first')

# Filtered Market Cap
warren_buffett_portfolio = warren_buffett_portfolio[warren_buffett_portfolio['Market Cap'] > 0]

# Duplicated warren_buffett_portfolio
warren_buffett_portfolio_copy = warren_buffett_portfolio.copy(deep=True)

# Duplicated warren_buffett_portfolio_copy
warren_buffett_portfolio_copy_copy = warren_buffett_portfolio_copy.copy(deep=True)

# Dropped duplicates in warren_buffett_portfolio_copy
warren_buffett_portfolio_copy = warren_buffett_portfolio_copy.drop_duplicates(subset=['Symbol'], keep='first')

# Dropped duplicates in warren_buffett_portfolio_copy_copy
warren_buffett_portfolio_copy_copy = warren_buffett_portfolio_copy_copy.drop_duplicates(subset=['Symbol'], keep='last')

# Renamed warren_buffett_portfolio_copy to smallest_market_cap_day
smallest_market_cap_day = warren_buffett_portfolio_copy

# Renamed warren_buffett_portfolio_copy_copy to largest_market_cap_day
largest_market_cap_day = warren_buffett_portfolio_copy_copy

# Concatenated 2 into dataframes into df_concat
df_concat = pd.concat([smallest_market_cap_day, largest_market_cap_day], join='inner', ignore_index=True)

# Sorted Symbol in descending order
df_concat = df_concat.sort_values(by='Symbol', ascending=False, na_position='last')

# Deleted columns Adj Close, Num_Employees, Name, High, Low, Open, Close, Volume
df_concat.drop(['Adj Close', 'Num_Employees', 'Name', 'High', 'Low', 'Open', 'Close', 'Volume'], axis=1, inplace=True)

# Added column new-column-yaxq
df_concat.insert(3, 'new-column-yaxq', 0)

# Reordered column new-column-yaxq
df_concat_columns = [col for col in df_concat.columns if col != 'new-column-yaxq']
df_concat_columns.insert(2, 'new-column-yaxq')
df_concat = df_concat[df_concat_columns]

# Renamed columns Change in Market Cap
df_concat.rename(columns={'new-column-yaxq': 'Change in Market Cap'}, inplace=True)

# Set formula of Change in Market Cap
df_concat['Change in Market Cap'] = IF(df_concat['Symbol']==df_concat['Symbol'].shift(1), (MAX(RollingRange(df_concat[['Market Cap']], 2, -1)) - MIN(RollingRange(df_concat[['Market Cap']], 2, -1))) / MIN(RollingRange(df_concat[['Market Cap']], 2, -1)),0)

# Filtered Change in Market Cap
df_concat = df_concat[df_concat['Change in Market Cap'] != 0]

# Sorted Change in Market Cap in descending order
df_concat = df_concat.sort_values(by='Change in Market Cap', ascending=False, na_position='last')

# Pivoted df_concat into average_change_in_market_cap_by_sector_1
tmp_df = df_concat[['Change in Market Cap', 'Sector']].copy()
pivot_table = tmp_df.pivot_table(
    index=['Sector'],
    values=['Change in Market Cap'],
    aggfunc={'Change in Market Cap': ['mean']}
)
pivot_table = pivot_table.set_axis([flatten_column_header(col) for col in pivot_table.keys()], axis=1)
average_change_in_market_cap_by_sector_1 = pivot_table.reset_index()

# Formatted dataframes. View these styling objects to see the formatted dataframe
df_concat_styler = df_concat.style\
    .format("{:,.2%}", subset=['Change in Market Cap'])


In [32]:
import plotly.express as px
# Construct the graph and style it. Further customize your graph by editing this code.
# See Plotly Documentation for help: https://plotly.com/python/plotly-express/
fig = px.bar(df_concat, x='Symbol', y='Change in Market Cap')
fig.update_layout(
        title='Symbol, Change in Market Cap bar chart', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        barmode='group', 
        paper_bgcolor='#FFFFFF'
    )
fig.show(renderer="iframe")

In [33]:
import plotly.express as px
# Construct the graph and style it. Further customize your graph by editing this code.
# See Plotly Documentation for help: https://plotly.com/python/plotly-express/
fig = px.bar(average_change_in_market_cap_by_sector_1, x='Sector', y='Change in Market Cap mean')
fig.update_layout(
        title='Sector, Change in Market Cap mean bar chart', 
        xaxis={
            "showgrid": True, 
            "rangeslider": {
                "visible": True, 
                "thickness": 0.05
            }
        }, 
        yaxis={
            "showgrid": True
        }, 
        legend={
            "orientation": 'v'
        }, 
        barmode='group', 
        paper_bgcolor='#FFFFFF'
    )
fig.show(renderer="iframe")