In [721]:
import csv

import numpy as np
import pandas as pd
from collections import defaultdict
from itertools import islice

from debugpy.launcher import output

## Advanced Practice Problems Using `defaultdict()`

### **Problem 1: Flash Test Failure Rate per Shift**

**Goal:**
Calculate the number of flash test failures and total tests per shift. Then compute and display the **failure rate (%)**.

**Expected Output (example):**

```
Shift 1 ➜ 3 failures / 150 tests ➜ 2.00%
Shift 2 ➜ 5 failures / 140 tests ➜ 3.57%
Shift 3 ➜ 1 failures / 135 tests ➜ 0.74%
```





In [722]:
flash_test_failure_per_shift = defaultdict(lambda: {'Flash Failure': 0, 'Total Test': 0})

In [723]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    shift = row['Shift']
    
    flash_test_failure_per_shift[shift]['Total Test'] += 1
    
    if row['Flash Test Result'].strip().lower() == 'fail':
      flash_test_failure_per_shift[shift]['Flash Failure'] += 1

In [724]:
results = {}

In [725]:
for shift, summary in flash_test_failure_per_shift.items():
  total = summary['Total Test']
  failures = summary['Flash Failure']
  rate = (failures / total) * 100 if total else 0
  results[shift] = {
    'Failures': failures,
    'Total Tests': total,
    'Failure Rate (%)': rate
  }

In [726]:
for shift, summary in results.items():
  print(f'{shift} -> {summary['Failures']} failures / {summary["Total Tests"]} tests --> {summary["Failure Rate (%)"]:.2f}%')

Shift 1 -> 36 failures / 90 tests --> 40.00%
Shift 2 -> 43 failures / 90 tests --> 47.78%


### **Problem 2: Average Power Output by Assembly Line**

**Goal:**
Group panels by `'Assembly Line'` and compute the **average `'Power Output (W)'`** for each line.

**Expected Output (example):**

```
Line A ➜ 318.7 W
Line B ➜ 321.5 W
Line C ➜ 319.0 W
```

In [727]:
output_by_assembly_line = defaultdict(list)

In [728]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
      assembly_line = row['Assembly Line']
      power_output = float(row['Power Output (W)'])
      output_by_assembly_line[assembly_line].append(power_output)
      

In [729]:
average_power_output = {
  line: round(sum(output)/ len(output), 2)
  for line, output in output_by_assembly_line.items()
}

In [730]:
average_power_output

{'Line 3': 326.3, 'Line 1': 333.33, 'Line 2': 323.58}

### **Problem 3: Efficiency Classification Count per Shift**

**Goal:**
Classify panels into "Excellent" (≥ 20%), "Good" (18–19.99%), "Average" (15–17.99%), and "Poor" (< 15%) based on `'Efficiency (%)'` **per shift**.

**Expected Output (example):**

```
Shift 1:
  Excellent: 42
  Good: 63
  Average: 19
  Poor: 6
...
```


In [731]:
efficiency_per_shift = defaultdict(lambda: defaultdict(int))

In [732]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    
    for row in reader:
      shift = row['Shift']
      efficiency = float(row['Efficiency (%)'])
      
      if efficiency >= 20:
        efficiency_per_shift[shift]['Excellent'] += 1
      elif efficiency >= 18:
        efficiency_per_shift[shift]['Good'] += 1
      elif efficiency >= 15:
        efficiency_per_shift[shift]['Average'] += 1
      else:
        efficiency_per_shift[shift]['Poor'] += 1

In [733]:
efficiency_per_shift

defaultdict(<function __main__.<lambda>()>,
            {'Shift 1': defaultdict(int,
                         {'Average': 37, 'Excellent': 27, 'Good': 26}),
             'Shift 2': defaultdict(int,
                         {'Excellent': 30, 'Good': 24, 'Average': 36})})

### **Problem 4: Most Common Number of Cells per Panel Type**

**Goal:**
Find the **most frequently used `'Number of Cells'`** for each `'Panel Type'`.

**Expected Output (example):**

```
Monocrystalline ➜ 60 cells
Polycrystalline ➜ 72 cells
Thin-Film ➜ 120 cells
```


In [734]:
common_cells_per_panel = defaultdict(lambda: defaultdict(int))

In [735]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    panel_type = row['Panel Type']
    number_of_cells = row['Number of Cells']
    common_cells_per_panel[panel_type][number_of_cells] += 1

In [736]:
for panel_type, number_of_cells in common_cells_per_panel.items():
  max_value = max(number_of_cells.values())
  for cell, count in number_of_cells.items():
    if count == max_value:
      print(f'{panel_type} --> {cell} cells')

Thin-Film --> 60 cells
Polycrystalline --> 72 cells
Monocrystalline --> 72 cells


### **Problem 5: Junction Box Attachment Success Rate by Operator**

**Goal:**
For each operator, count how many times `'Junction Box Attached'` is `"Yes"` vs `"No"`, and compute the **success rate**.

**Expected Output (example):**

```
Operator 101 ➜ 48/50 ➜ 96.0%
Operator 202 ➜ 51/53 ➜ 96.2%
...
```

In [737]:
jba_success_rate_per_operator = defaultdict(lambda: {'JB Attached': 0, 'Total Attached': 0})

In [738]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    operator = row['Operator ID']
    jb_attached = row['Junction Box Attached']
    jba_success_rate_per_operator[operator]['Total Attached'] += 1
    if jb_attached == 'Yes':
      jba_success_rate_per_operator[operator]['JB Attached'] += 1

In [739]:
for operator, jba_result in jba_success_rate_per_operator.items():
  jb_attached = jba_result['JB Attached']
  total_attached = jba_result['Total Attached']
  if jba_result['Total Attached'] != 0:
    success_rate = round(jb_attached / total_attached * 100, 2)
  else:
    success_rate = 0 
  print(f'{operator} --> {jb_attached} / {total_attached} --> {success_rate}%')

EMP593 --> 1 / 1 --> 100.0%
EMP448 --> 1 / 1 --> 100.0%
EMP993 --> 1 / 1 --> 100.0%
EMP533 --> 0 / 1 --> 0.0%
EMP415 --> 1 / 1 --> 100.0%
EMP358 --> 1 / 1 --> 100.0%
EMP795 --> 0 / 1 --> 0.0%
EMP538 --> 0 / 2 --> 0.0%
EMP138 --> 1 / 1 --> 100.0%
EMP554 --> 0 / 1 --> 0.0%
EMP973 --> 1 / 1 --> 100.0%
EMP727 --> 0 / 2 --> 0.0%
EMP508 --> 0 / 1 --> 0.0%
EMP708 --> 2 / 2 --> 100.0%
EMP476 --> 0 / 1 --> 0.0%
EMP519 --> 1 / 1 --> 100.0%
EMP713 --> 1 / 1 --> 100.0%
EMP532 --> 0 / 1 --> 0.0%
EMP164 --> 1 / 1 --> 100.0%
EMP909 --> 0 / 1 --> 0.0%
EMP305 --> 0 / 1 --> 0.0%
EMP132 --> 1 / 1 --> 100.0%
EMP497 --> 0 / 1 --> 0.0%
EMP402 --> 1 / 1 --> 100.0%
EMP794 --> 0 / 1 --> 0.0%
EMP762 --> 0 / 1 --> 0.0%
EMP813 --> 1 / 1 --> 100.0%
EMP695 --> 1 / 1 --> 100.0%
EMP349 --> 1 / 1 --> 100.0%
EMP705 --> 1 / 1 --> 100.0%
EMP211 --> 1 / 1 --> 100.0%
EMP656 --> 0 / 1 --> 0.0%
EMP528 --> 0 / 1 --> 0.0%
EMP913 --> 0 / 1 --> 0.0%
EMP694 --> 0 / 1 --> 0.0%
EMP582 --> 1 / 1 --> 100.0%
EMP350 --> 1 / 2 --> 50.0%

### **Problem 6: Average Cell Alignment Deviation per Panel Type**

**Goal:**
Compute the **average `'Cell Alignment Deviation (mm)'`** for each `'Panel Type'`.

**Expected Output (example):**

```
Monocrystalline ➜ 0.21 mm
Polycrystalline ➜ 0.28 mm
Thin-Film ➜ 0.19 mm
```

In [740]:
cell_alignment_deviation_per_panel_type = defaultdict(lambda: {'Total Cell Alignment Deviation': 0, 'Number of Tests': 0})

In [741]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    panel_type = row['Panel Type']
    cell_align_dev = float(row['Cell Alignment Deviation (mm)'])
    cell_alignment_deviation_per_panel_type[panel_type]['Total Cell Alignment Deviation'] += cell_align_dev
    cell_alignment_deviation_per_panel_type[panel_type]['Number of Tests'] += 1

In [742]:
for panel_type, stats in cell_alignment_deviation_per_panel_type.items():
  average_deviation = stats['Total Cell Alignment Deviation'] / stats['Number of Tests']
  print(f'{panel_type} --> {average_deviation:.2f} mm')

Thin-Film --> 0.21 mm
Polycrystalline --> 0.25 mm
Monocrystalline --> 0.25 mm


### **Problem 7: Final Inspection Status Breakdown Per Date**

**Goal:**
For each `'Date'`, count how many panels passed and failed the `'Final Inspection'`.

**Expected Output (example):**

```
2025-04-01 ➜ Pass: 85 | Fail: 5
2025-04-02 ➜ Pass: 79 | Fail: 7
...
```


In [743]:
final_inspection_status = defaultdict(lambda: {'Pass': 0, 'Fail': 0})

In [744]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    date_ = row['Date']
    status = row['Final Inspection']
    if status == 'Pass':
      final_inspection_status[date_]['Pass'] += 1
    else:
      final_inspection_status[date_]['Fail'] += 1
        

In [745]:
for date, status in final_inspection_status.items():
  pass_ = status['Pass']
  fail_ = status['Fail']
  print(f'{date} --> Pass: {pass_} | Fail: {fail_}')

2025-04-01 --> Pass: 4 | Fail: 2
2025-04-02 --> Pass: 3 | Fail: 3
2025-04-03 --> Pass: 3 | Fail: 3
2025-04-04 --> Pass: 4 | Fail: 2
2025-04-05 --> Pass: 6 | Fail: 0
2025-04-06 --> Pass: 6 | Fail: 0
2025-04-07 --> Pass: 6 | Fail: 0
2025-04-08 --> Pass: 5 | Fail: 1
2025-04-09 --> Pass: 5 | Fail: 1
2025-04-10 --> Pass: 6 | Fail: 0
2025-04-11 --> Pass: 4 | Fail: 2
2025-04-12 --> Pass: 6 | Fail: 0
2025-04-13 --> Pass: 6 | Fail: 0
2025-04-14 --> Pass: 6 | Fail: 0
2025-04-15 --> Pass: 6 | Fail: 0
2025-04-16 --> Pass: 3 | Fail: 3
2025-04-17 --> Pass: 6 | Fail: 0
2025-04-18 --> Pass: 4 | Fail: 2
2025-04-19 --> Pass: 4 | Fail: 2
2025-04-20 --> Pass: 5 | Fail: 1
2025-04-21 --> Pass: 5 | Fail: 1
2025-04-22 --> Pass: 3 | Fail: 3
2025-04-23 --> Pass: 4 | Fail: 2
2025-04-24 --> Pass: 4 | Fail: 2
2025-04-25 --> Pass: 5 | Fail: 1
2025-04-26 --> Pass: 6 | Fail: 0
2025-04-27 --> Pass: 4 | Fail: 2
2025-04-28 --> Pass: 5 | Fail: 1
2025-04-29 --> Pass: 4 | Fail: 2
2025-04-30 --> Pass: 5 | Fail: 1


### **Problem 8: Daily Average Glass Thickness by Shift**

**Goal:**
Calculate the average `'Glass Thickness (mm)'` **per day** and **per shift**.

**Expected Output (example):**

```
2025-04-01 (Shift 1): 3.2 mm
2025-04-01 (Shift 2): 3.1 mm
...
```


In [746]:
daily_avg_glass_thickness_per_shift = defaultdict(lambda: defaultdict(lambda: {'Total Glass Thickness': 0, 'Number of Tests': 0}))

In [747]:
with open('solar_panel_assembly_dataset.csv', newline='') as csvfile:
  reader = csv.DictReader(csvfile)
  for row in reader:
    date = row['Date']
    shift = row['Shift']
    glass_thickness = float(row['Glass Thickness (mm)'])
    daily_avg_glass_thickness_per_shift[date][shift]['Total Glass Thickness'] += glass_thickness
    daily_avg_glass_thickness_per_shift[date][shift]['Number of Tests'] += 1

In [748]:
for date, shift_and_stats in daily_avg_glass_thickness_per_shift.items(): 
  for shift, stats in shift_and_stats.items():
    total_glass_thickness = stats['Total Glass Thickness']
    num_tests = stats['Number of Tests']
    average_glass_thickness = total_glass_thickness / num_tests
    print(f'{date} ({shift}): {average_glass_thickness:.2f} mm')

2025-04-01 (Shift 1): 3.47 mm
2025-04-01 (Shift 2): 3.67 mm
2025-04-02 (Shift 1): 3.60 mm
2025-04-02 (Shift 2): 3.23 mm
2025-04-03 (Shift 1): 3.40 mm
2025-04-03 (Shift 2): 3.43 mm
2025-04-04 (Shift 1): 3.53 mm
2025-04-04 (Shift 2): 3.63 mm
2025-04-05 (Shift 1): 3.93 mm
2025-04-05 (Shift 2): 3.93 mm
2025-04-06 (Shift 1): 3.50 mm
2025-04-06 (Shift 2): 3.70 mm
2025-04-07 (Shift 1): 3.63 mm
2025-04-07 (Shift 2): 3.47 mm
2025-04-08 (Shift 1): 3.53 mm
2025-04-08 (Shift 2): 3.27 mm
2025-04-09 (Shift 1): 3.50 mm
2025-04-09 (Shift 2): 3.73 mm
2025-04-10 (Shift 1): 3.33 mm
2025-04-10 (Shift 2): 3.53 mm
2025-04-11 (Shift 1): 3.33 mm
2025-04-11 (Shift 2): 3.43 mm
2025-04-12 (Shift 1): 3.40 mm
2025-04-12 (Shift 2): 3.47 mm
2025-04-13 (Shift 1): 3.53 mm
2025-04-13 (Shift 2): 3.67 mm
2025-04-14 (Shift 1): 3.33 mm
2025-04-14 (Shift 2): 3.57 mm
2025-04-15 (Shift 1): 3.60 mm
2025-04-15 (Shift 2): 3.40 mm
2025-04-16 (Shift 1): 3.77 mm
2025-04-16 (Shift 2): 3.83 mm
2025-04-17 (Shift 1): 3.60 mm
2025-04-17

### **Problem 9: Operator with Highest Average Efficiency**

**Goal:**
Identify the operator with the **highest average `'Efficiency (%)'`** (only include those who assembled ≥ 10 panels).

**Expected Output (example):**

```
Operator 205 ➜ 20.8% (12 panels)
```








### **Problem 10: Count of Panels with Insulation Resistance < 50 MΩ per Shift**

**Goal:**
For each shift, count how many panels had `'Insulation Resistance (MΩ)'` less than 50.

**Expected Output (example):**

```
Shift 1 ➜ 4 panels
Shift 2 ➜ 2 panels
Shift 3 ➜ 1 panel
```
