In [None]:
print("Hello, World!")

In [None]:
import ipywidgets as widgets
from IPython.display import display

def create_checklist_with_progress(title, items):
    print(f"\n{title}\n")

    progress_label = widgets.HTML(value=f"<b>Progress: 0 / {len(items)}</b>")
    progress_bar = widgets.IntProgress(value=0, min=0, max=len(items), bar_style='info')

    checkboxes = []

    def update_progress(change):
        completed = sum(cb.value for cb in checkboxes)
        progress_bar.value = completed
        if completed == len(items):
            progress_label.value = f"<b>Progress: {completed} / {len(items)} ✅ Complete!</b>"
            progress_bar.bar_style = 'success'
        else:
            progress_label.value = f"<b>Progress: {completed} / {len(items)}</b>"
            progress_bar.bar_style = 'info'

    for item in items:
        checkbox = widgets.Checkbox(value=False, indent=False, layout=widgets.Layout(width='20px'))
        checkbox.observe(update_progress, names='value')
        checkboxes.append(checkbox)
        label = widgets.HTML(item.replace('\n', '<br>'))
        display(widgets.HBox([checkbox, label]))

    display(widgets.HBox([progress_bar, progress_label]))

tier1 = [
    "Run a code cell (click the play button or press Shift+Enter)",
    "Add a new code cell (+ Code button or Ctrl/Cmd+M then B)\nThen write a Python print statement",
    "Add a new text/markdown cell (+ Text button or Ctrl/Cmd+M then M).\nAdd a section header and 3 bullet points",
    "Update a markdown cell (double-click to edit, Shift+Enter to render)",
    "Delete a cell (click the trash icon or Ctrl/Cmd+M then D)",
    "Move a cell up or down (use the up/down arrows in cell menu)",
    "Undo a deleted cell (Edit → Undo delete cells)",
    "Clear all outputs (Edit → Clear all outputs)",
    "Restart the runtime (Runtime → Restart session)",
]

create_checklist_with_progress("Tier 1: The Basics", tier1)


## Clear all outputs

"Outputs" are the results that appear below each code cell after you run it—things like printed text, tables, charts, error messages, or maps.

"Clear all outputs" removes all of those results from the notebook while keeping your code intact. You'd do this when:

- **Sharing or submitting the notebook** — to make it cleaner or smaller in file size (images and charts can bloat the file)
- **Starting fresh** — you want to re-run everything from scratch and see the outputs generate in order
- **Hiding old results** — your outputs are stale or confusing because you've changed the code but haven't re-run it yet
- **Troubleshooting** — to confirm which cells you've actually run in your current session

It's a cosmetic/organizational action—it doesn't affect your variables in memory or reset the runtime. Your code still "remembers" everything it computed; you just can't see the printed results anymore until you run the cells again.

## Restart Session

In Colab (and Jupyter notebooks), the "runtime" is the Python process running in the background that executes your code and holds all your variables, imported libraries, and data in memory.

"Restart the session" means killing that process and starting a fresh one. You'd do this when:

- **Something gets stuck** — a cell is running forever or the notebook becomes unresponsive
- **You want a clean slate** — to make sure your notebook runs correctly from top to bottom without depending on variables you created out of order
- **After installing a package** — some packages need a restart to load properly
- **Memory issues** — if you've loaded huge datasets and want to free up RAM

After restarting, all your variables are gone and you need to re-run your cells from the beginning. Your code stays intact, just the "state" (what's in memory) is cleared.


In [None]:
tier2 = [
        "Save a copy to your Google Drive",
        "Download your notebook (File → Download → Download .ipynb)",
        "Upload a file from your computer",
]
create_checklist_with_progress("Tier 2: File Management", tier2)


In [None]:
tier3 = [
    "Install a package with !pip install\n(See the next two cells)",
    "Use tab-completion\n(Read the cell after that, and make a new cell to try it)",
    "Load a CSV from a URL",
    "Create a simple chart",
    "Get help on a function",
    "View keyboard shortcuts",
    "Mount a Google Drive",

]

create_checklist_with_progress("Tier 3: Developer Skills", tier3)

### TODO: The next cell will give an error. (Figure out how to fix it.)

In [None]:
# !pip install cowsay
import cowsay

cowsay.cow("I installed a package!")


### NOTE: pandas is already installed in Google Colab, so we don't need to `pip install` it.

In [None]:
import pandas as pd

# First, create a DataFrame to work with
df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35],
    'city': ['Baltimore', 'Boston', 'Denver']
})

df

**Code Completion** - Create a new Code cell below this one. Type your dataframe name with a dot afterward, in this case, `df.` and press **Tab**. You'll see a dropdown of all available methods like `head`, `tail`, `describe`, `columns`, etc.

Try these:
```python
df.he  # press Tab → autocompletes to df.head
df.des  # press Tab → autocompletes to df.describe
df.col  # press Tab → autocompletes to df.columns
```

You can also use tab completion on column names after filtering. in this case, pressing Tab → autocompletes to df['name']
```python
df['na  
```

And on any object:
```python
import matplotlib.pyplot as plt
plt.  # press Tab to see all plotting functions
```

### TODO: First we create a dataframe from SeedMoney data. Then practice  **code completion**.

In [None]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/mroswell/ai-coding-essentials/refs/heads/main/data/seedmoney.csv')
# df = pd.read_csv('seedmoney2015-2019.csv')

display(df.head())

In [None]:
print("Fields (columns) in the CSV file:")
for col in df.columns:
    print(f"- {col}")

```
* . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~
```

Things to Try:
 - View a vertical list of the first record
 - Generate a bar chart of the garden categories
 - Make the same bar chart horizontal
 - Use Mergely.com to compare the two chart scripts
 - What were the total crowd funds (minus the fees)
   *[Watch out, this is unexpectedly tricky.]*
 - Save a CSV of just the Prison Gardens
```
* . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~
```

### TODO:  View a vertical list of the first record


### TODO:
- Generate a bar chart of the garden categories
- Make the same bar chart horizontal
- Use Mergely.com to compare the two chart scripts

### TODO: Determine the total Crowd funds (minus fees)
*Watch out, this is unexpectedly tricky*

 ### TODO: Save a CSV of just the Prison Gardens


---
**NOTE:** Do all of your work above this line.

In [105]:
cowsay.trex(" Do not pass 'Trex the Tyrannosaurus' until you   have figured out 'Things to Try' on your own!    Select the Things to Try cell, add code cells,   and troubleshoot anything that doesn't work." )

  _________________________________________________
 /                                                 \
| Do not pass 'Trex the Tyrannosaurus' until you    |
| have figured out 'Things to Try' on your own!     |
| Select the Things to Try cell, add code cells,    |
| and troubleshoot anything that doesn't work.      |
 \                                                 /
                                                        \
                                                         \
                                                          \
                                                           \
                                                              .-=-==--==--.
                                                        ..-=="  ,'o`)      `.
                                                      ,'         `"'         \
                                                     :  (                     `.__...._
                                                     |             

## View a vertical list of the first record


In [None]:
print("First record:")
print(df.iloc[0])


### Counts by Category

In [None]:
project_region_counts = df['Category'].value_counts()
display(project_region_counts)

### Generate a bar chart of the count by Category

In [None]:
import matplotlib.pyplot as plt

category_counts = df['Category'].value_counts()

plt.figure(figsize=(12, 6))
category_counts.plot(kind='bar')
plt.title('Count of Records by Category')
plt.xlabel('Category')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

### Make the same bar chart horizontal

In [None]:
category_counts = df['Category'].value_counts()

plt.figure(figsize=(10, 8))
category_counts.plot(kind='barh')
plt.title('Count of Records by Category')
plt.xlabel('Count')
plt.ylabel('Category')
plt.tight_layout()
plt.show()

```
* . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~
```
**Extra Credit:** Visit Mergely, click on Diff, and compare the code from the two chart versions.
```
* . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~ * . ~
```

### Total the Crowdfunds minus fees

In [None]:
# Clean the column (remove $ and commas, convert to number)
df['Crowdfunds minus fee'] = df['Crowdfunds minus fee'].replace(r'[\$,]', '', regex=True).astype(float)

# Calculate and display total
total = df['Crowdfunds minus fee'].sum()
print(f"Total: ${total:,.2f}")

In [None]:
year_counts = df['Year'].value_counts().sort_index()
display(year_counts)

In [None]:
prison_gardens_df = df[df['Category'] == 'prison gardens']

prison_gardens_df

In [None]:
from google.colab import files
prison_gardens_df.to_csv('prison_gardens.csv', index=False)
print(prison_gardens_df.columns.tolist())

files.download('prison_gardens.csv')

In [None]:
print(df['Project Primary Beneficiaries'].value_counts())

print(df['Project Secondary Beneficiaries'].value_counts())

### Treemap Chart

In [None]:
!pip install squarify
import matplotlib.pyplot as plt
import squarify # Used for treemap visualizations

In [None]:
primary_beneficiary_counts = df['Project Primary Beneficiaries'].value_counts()

# Filter out beneficiaries with very small counts if the list is too long for a readable treemap
# For example, keep only beneficiaries that represent at least 1% of the total
threshold = len(df) * 0.01
primary_beneficiary_counts = primary_beneficiary_counts[primary_beneficiary_counts >= threshold]

# If there are still many, you might consider grouping 'Others'
if len(primary_beneficiary_counts) > 20: # Arbitrary limit for readability
    top_beneficiaries = primary_beneficiary_counts.nlargest(19) # Get top N-1
    other_count = primary_beneficiary_counts.nsmallest(len(primary_beneficiary_counts) - 19).sum()
    primary_beneficiary_counts = pd.concat([top_beneficiaries, pd.Series({'Others': other_count})])

plt.figure(figsize=(15, 10))
squarify.plot(sizes=primary_beneficiary_counts.values,
              label=primary_beneficiary_counts.index,
              alpha=.8,
              color=plt.cm.Paired.colors) # Use a colormap for distinct colors
plt.title('Treemap of Project Primary Beneficiaries (Thresholded)', fontsize=16)
plt.axis('off')
plt.show()

In [None]:
!pip install plotly

import plotly.express as px

# Create a dataframe with both category and beneficiary
df = df.groupby(['Category', 'Project Primary Beneficiaries']).size().reset_index(name='Count')

# Create hierarchical treemap
fig = px.treemap(df,
                 path=['Category', 'Project Primary Beneficiaries'],  # Hierarchy levels
                 values='Count',
                 title='Projects by Category and Primary Beneficiary')

fig.update_layout(width=1000, height=700)
fig.show()