### Load all datasets


In [21]:
from unittest.mock import inplace

import pandas as pd

integrated_og_combined_color_count = pd.read_csv('../outputs/integrated_ocean_plastics.csv')
integrated_og_combined_color_count.describe()

Unnamed: 0,Year,mp/kg dw,log_concentration
count,2597.0,2729.0,2729.0
mean,2018.641894,13735.35,0.852433
std,0.500755,705134.3,0.829072
min,2013.0,0.0,0.0
25%,2018.0,0.7231181,0.236315
50%,2019.0,3.744196,0.676163
75%,2019.0,18.75361,1.295646
max,2019.0,36836030.0,7.566273


In [11]:
xb_color_count = pd.read_csv("../data/Xialiao Beach color count.csv")
lb_color_count = pd.read_csv("../data/Longmen Beach color count.csv")

### Combine colors sets and save as a new CSV file

In [13]:
combined_color_count = pd.concat([xb_color_count, lb_color_count], ignore_index=True)

# Check the shape of the combined dataframe
print(f"Xialiao Beach shape: {xb_color_count.shape}")
print(f"Longmen Beach shape: {lb_color_count.shape}")
print(f"Combined dataframe shape: {combined_color_count.shape}")

# Preview the combined dataframe
combined_color_count.head()

Xialiao Beach shape: (966, 18)
Longmen Beach shape: (1626, 18)
Combined dataframe shape: (2592, 18)


Unnamed: 0,Date_YYYY-MM-DD,Country_Region,Location_name,Location_lat,Location_lon,Transect,Position,Size_min_mm,Size_max_mm,Size_class,no_color,black,grey,red_pink,orange_brown_yellow,green,blue,purple
0,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,1,1,5,microplastics,52.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,2,1,5,microplastics,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,3,1,5,microplastics,76.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0
3,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,4,1,5,microplastics,56.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,5,1,5,microplastics,179.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0


In [14]:
# Save the combined dataframe to a new CSV file
combined_color_count.to_csv('../outputs/combined_beach_color_count.csv', index=False)

### Display dataset info


In [16]:
print("Original dataset info:")
print(f"Number of rows: {len(combined_color_count)}")
print(f"Number of columns: {len(combined_color_count.columns)}")
print(f"Columns: {', '.join(combined_color_count.columns)}")
print()

Original dataset info:
Number of rows: 2592
Number of columns: 18
Columns: Date_YYYY-MM-DD, Country_Region, Location_name, Location_lat, Location_lon, Transect, Position, Size_min_mm, Size_max_mm, Size_class, no_color, black, grey, red_pink, orange_brown_yellow, green, blue, purple



### Create a new column for "Dominant Color"

In [17]:
combined_color_count["Dominant Color"] = "unknown"
print("Step 1: Added 'Dominant Color' column with default value 'unknown'")

Step 1: Added 'Dominant Color' column with default value 'unknown'


### Define a function to find the dominant color for each row


In [18]:
def find_dominant_color(row):
    """
    Find the column with the highest numerical value among the color columns.
    If the highest value is 0, keep "unknown".
    If the column is "no_color", return "transparent".
    Otherwise, return the name of the column.
    """
    # List of color columns to check
    color_cols = ['no_color', 'black', 'grey', 'red_pink',
                 'orange_brown_yellow', 'green', 'blue', 'purple']

    # Extract the color values for this row
    color_values = [row[col] for col in color_cols]

    # Find the maximum value and its index
    max_val = max(color_values)
    max_idx = color_values.index(max_val)

    # If the maximum value is 0, keep "unknown"
    if max_val == 0:
        return "unknown"

    # Get the name of the column with the maximum value
    max_col = color_cols[max_idx]

    # Replace "no_color" with "transparent"
    if max_col == "no_color":
        return "transparent"
    else:
        return max_col

### Apply the function to each row and drop the numerical color columns

In [19]:
combined_color_count["Dominant Color"] = combined_color_count.apply(find_dominant_color, axis=1)
print("Step 2: Applied function to determine the dominant color for each row")

# Display information about the dominant color transformation
print("\nDominant color distribution:")
print(combined_color_count["Dominant Color"].value_counts())
print()

# Step 3: Drop the numerical color columns
color_cols = ['no_color', 'black', 'grey', 'red_pink',
             'orange_brown_yellow', 'green', 'blue', 'purple']

combined_color_count = combined_color_count.drop(columns=color_cols)
print("Step 3: Dropped numerical color columns")

Step 2: Applied function to determine the dominant color for each row

Dominant color distribution:
Dominant Color
transparent            1833
unknown                 650
black                    26
green                    22
grey                     18
orange_brown_yellow      15
red_pink                 14
blue                     13
purple                    1
Name: count, dtype: int64

Step 3: Dropped numerical color columns


### Display information about the transformations and save the modified dataset

In [20]:
print("\nFinal dataset info:")
print(f"Number of rows: {len(combined_color_count)}")
print(f"Number of columns: {len(combined_color_count.columns)}")
print(f"Columns: {', '.join(combined_color_count.columns)}")
print()

# Save the modified dataset to a CSV file
output_file = "beach_data_with_dominant_color.csv"
combined_color_count.to_csv(output_file, index=False)
print(f"Step 4: Modified dataset saved to '{output_file}'")

# Print a sample of the transformed data
print("\nSample of transformed data (first 5 rows):")
print(combined_color_count.head(5).to_string())



Final dataset info:
Number of rows: 2592
Number of columns: 11
Columns: Date_YYYY-MM-DD, Country_Region, Location_name, Location_lat, Location_lon, Transect, Position, Size_min_mm, Size_max_mm, Size_class, Dominant Color

Step 4: Modified dataset saved to 'beach_data_with_dominant_color.csv'

Sample of transformed data (first 5 rows):
  Date_YYYY-MM-DD Country_Region  Location_name  Location_lat  Location_lon Transect  Position  Size_min_mm  Size_max_mm     Size_class Dominant Color
0      2018-04-25         Taiwan  Xialiao_Beach      25.21469     121.65406        A         1            1            5  microplastics    transparent
1      2018-04-25         Taiwan  Xialiao_Beach      25.21469     121.65406        A         2            1            5  microplastics    transparent
2      2018-04-25         Taiwan  Xialiao_Beach      25.21469     121.65406        A         3            1            5  microplastics    transparent
3      2018-04-25         Taiwan  Xialiao_Beach      25.21

In [23]:
integrated_og_df = pd.read_csv("../outputs/integrated_ocean_plastics.csv")
integrated_og_df.head()

Unnamed: 0,sample_id,Continent,Country,region,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,...,mp/kg dw,log_concentration,Size_class,Standardized_Shapes,Dominant_Size,dominant_color,source,environment_type,location,concentration_level
0,DOER_97,Africa,Tunisia,Africa,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,...,3682.0,3.566202,microplastics,"fiber, fragment",<2 mm,unknown,DOER Database,Estuary,"Menzel Jemil, Tunisia",High
1,DOER_98,Africa,Tunisia,Africa,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,...,6628.0,3.821448,microplastics,"fiber, fragment",<2 mm,unknown,DOER Database,Estuary,"Carrier Bay, Tunisia",High
2,DOER_96,Africa,Tunisia,Africa,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,...,17921.0,4.253386,microplastics,"fiber, fragment",<2 mm,unknown,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia",Very High
3,DOER_99,Africa,Tunisia,Africa,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,...,3191.0,3.504063,microplastics,"fiber, fragment",<2 mm,unknown,DOER Database,Estuary,"Channel, Tunisia",High
4,DOER_1218,Africa,South Africa,Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,...,28400.0,4.453334,unknown,unknown,(N/R),unknown,DOER Database,Estuary,"Port of Durban, South Africa",Very High


### Drop unnecessary columns from integrated dataset

In [24]:
integrated_og_df.drop(columns=["region", "dominant_color"], inplace=True)
integrated_og_df.head()

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,log_concentration,Size_class,Standardized_Shapes,Dominant_Size,source,environment_type,location,concentration_level
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,,3682.0,3.566202,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Jemil, Tunisia",High
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,,6628.0,3.821448,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Carrier Bay, Tunisia",High
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,,17921.0,4.253386,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia",Very High
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,,3191.0,3.504063,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Channel, Tunisia",High
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,28400.0,4.453334,unknown,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa",Very High


In [25]:
integrated_og_df.drop(columns=["concentration_level"], inplace=True)
integrated_og_df.head()

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,log_concentration,Size_class,Standardized_Shapes,Dominant_Size,source,environment_type,location
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,,3682.0,3.566202,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Jemil, Tunisia"
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,,6628.0,3.821448,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Carrier Bay, Tunisia"
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,,17921.0,4.253386,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia"
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,,3191.0,3.504063,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Channel, Tunisia"
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,28400.0,4.453334,unknown,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa"


In [26]:
integrated_og_df.drop(columns=["log_concentration"], inplace=True)
integrated_og_df.head()

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,Size_class,Standardized_Shapes,Dominant_Size,source,environment_type,location
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,,3682.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Jemil, Tunisia"
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,,6628.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Carrier Bay, Tunisia"
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,,17921.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia"
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,,3191.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Channel, Tunisia"
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,28400.0,unknown,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa"


In [27]:
combined_color_count.head(20)

Unnamed: 0,Date_YYYY-MM-DD,Country_Region,Location_name,Location_lat,Location_lon,Transect,Position,Size_min_mm,Size_max_mm,Size_class,Dominant Color
0,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,1,1,5,microplastics,transparent
1,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,2,1,5,microplastics,transparent
2,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,3,1,5,microplastics,transparent
3,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,4,1,5,microplastics,transparent
4,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,5,1,5,microplastics,transparent
5,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,6,1,5,microplastics,transparent
6,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,7,1,5,microplastics,transparent
7,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,8,1,5,microplastics,transparent
8,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,9,1,5,microplastics,transparent
9,2018-04-25,Taiwan,Xialiao_Beach,25.21469,121.65406,A,10,1,5,microplastics,transparent


### Add color column back to original set and make a copy of the DF

In [35]:
color_col = combined_color_count["Dominant Color"]
integrated_og_df["Dominant Color"] = color_col
int_with_dc_df = integrated_og_df.copy()
int_with_dc_df.head()

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,Size_class,Standardized_Shapes,Dominant_Size,source,environment_type,location,Dominant Color
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,,3682.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Jemil, Tunisia",transparent
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,,6628.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Carrier Bay, Tunisia",transparent
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,,17921.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia",transparent
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,,3191.0,microplastics,"fiber, fragment",<2 mm,DOER Database,Estuary,"Channel, Tunisia",transparent
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,28400.0,unknown,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa",transparent


** (FORGOT TO DROP A COLUMN) **

In [37]:
int_with_dc_df.drop(columns=["Standardized_Shapes"], inplace=True)
int_with_dc_df.head()

KeyError: "['Standardized_Shapes'] not found in axis"

### Define function for standardizing colors

In [39]:
import re


def categorize_size(size_str):
    """Categorize plastic sizes into microplastics (<5mm) or mesoplastics (≥5mm)"""
    if pd.isna(size_str) or size_str == '(N/R)' or size_str == 'unknown':
        return 'unknown'

    size_str = str(size_str).lower()

    # Extract numeric values
    numbers = re.findall(r'\d+\.?\d*', size_str)

    if not numbers:
        return 'unknown'

    # Get the first number found
    size_value = float(numbers[0])

    # Convert to mm if in microns
    if 'micron' in size_str:
        size_value = size_value / 1000

    # Apply categorization rule
    if '<' in size_str:
        # For "less than" notation (e.g., <5mm)
        if size_value < 5:
            return 'microplastics'
        else:
            return 'mesoplastics'
    else:
        # For direct size values
        if size_value < 5:
            return 'microplastics'
        else:
            return 'mesoplastics'

In [40]:
int_with_dc_df['Dominant_Size'].value_counts()

Dominant_Size
<5 mm              1296
5-25 mm            1296
<1 mm                65
(N/R)                51
<0.5 mm               7
<2 mm                 4
<300 micron           4
<500 micron           3
300-5000 micron       1
Name: count, dtype: int64

### Apply categorization function

In [41]:
# Apply the categorization function
int_with_dc_df['Size_Category'] = int_with_dc_df['Dominant_Size'].apply(categorize_size)

# Display the results
print("Distribution of size categories:")
print(int_with_dc_df['Size_Category'].value_counts())

# Show sample mappings
sample = int_with_dc_df[['Dominant_Size', 'Size_Category']].drop_duplicates().sort_values('Dominant_Size')
sample.head(20)

Distribution of size categories:
Size_Category
mesoplastics     2592
microplastics      84
unknown            53
Name: count, dtype: int64


Unnamed: 0,Dominant_Size,Size_Category
4,(N/R),unknown
53,300-5000 micron,microplastics
210,5-25 mm,mesoplastics
127,<0.5 mm,microplastics
54,<1 mm,microplastics
0,<2 mm,microplastics
7,<300 micron,microplastics
135,<5 mm,mesoplastics
5,<500 micron,microplastics
1761,,unknown


In [42]:
int_with_dc_df.head(20)

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,Size_class,Dominant_Size,source,environment_type,location,Dominant Color,Size_Category
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016.0,,3682.0,microplastics,<2 mm,DOER Database,Estuary,"Menzel Jemil, Tunisia",transparent,microplastics
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016.0,,6628.0,microplastics,<2 mm,DOER Database,Estuary,"Carrier Bay, Tunisia",transparent,microplastics
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016.0,,17921.0,microplastics,<2 mm,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia",transparent,microplastics
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016.0,,3191.0,microplastics,<2 mm,DOER Database,Estuary,"Channel, Tunisia",transparent,microplastics
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,28400.0,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa",transparent,unknown
5,DOER_1209,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,8300.0,unknown,<500 micron,DOER Database,Estuary,"Port of Durban, South Africa",transparent,microplastics
6,DOER_1210,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,3100.0,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa",transparent,unknown
7,DOER_1211,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,111933.0,unknown,<300 micron,DOER Database,Estuary,"Port of Durban, South Africa",transparent,microplastics
8,DOER_1212,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,40933.0,unknown,(N/R),DOER Database,Estuary,"Port of Durban, South Africa",transparent,unknown
9,DOER_1213,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,,,11800.0,unknown,<500 micron,DOER Database,Estuary,"Port of Durban, South Africa",transparent,microplastics


In [43]:
int_with_dc_df.drop(columns=["Dominant_Size"], inplace=True)
int_with_dc_df.to_csv("combined_and_integrated_ocean_plastics.csv", index=False)

In [44]:
int_with_dc_df.describe()

Unnamed: 0,Year,mp/kg dw
count,2597.0,2729.0
mean,2018.641894,13735.35
std,0.500755,705134.3
min,2013.0,0.0
25%,2018.0,0.7231181
50%,2019.0,3.744196
75%,2019.0,18.75361
max,2019.0,36836030.0


In [49]:
# DROP DUPLICATe rows
int_with_dc_df.drop_duplicates(inplace=True)

In [50]:
# convert "Year" and "Date" columns to datetime
int_with_dc_df['Year'] = pd.to_datetime(int_with_dc_df['Year'], format='%Y')
int_with_dc_df['Date'] = pd.to_datetime(int_with_dc_df['Date'], format='%Y-%m-%d')
int_with_dc_df.head()

Unnamed: 0,sample_id,Continent,Country,System,Waterbody,Zone Area,Tidal_Zone,Test_Area,Year,Date,mp/kg dw,Size_class,source,environment_type,location,Dominant Color,Size_Category
0,DOER_97,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Jemil,2016-01-01,NaT,3682.0,microplastics,DOER Database,Estuary,"Menzel Jemil, Tunisia",transparent,microplastics
1,DOER_98,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Carrier Bay,2016-01-01,NaT,6628.0,microplastics,DOER Database,Estuary,"Carrier Bay, Tunisia",transparent,microplastics
2,DOER_96,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Menzel Abderrahmane,2016-01-01,NaT,17921.0,microplastics,DOER Database,Estuary,"Menzel Abderrahmane, Tunisia",transparent,microplastics
3,DOER_99,Africa,Tunisia,Estuarine,Lagoon of Bizerte,Coastal,Subtidal,Channel,2016-01-01,NaT,3191.0,microplastics,DOER Database,Estuary,"Channel, Tunisia",transparent,microplastics
4,DOER_1218,Africa,South Africa,Estuarine,Durban Bay,Coastal,Harbor/Port,Port of Durban,NaT,NaT,28400.0,unknown,DOER Database,Estuary,"Port of Durban, South Africa",transparent,unknown


In [51]:
#save final df
int_with_dc_df.to_csv("combined_and_integrated_ocean_plastics.csv", index=False)