In [16]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Parse HTML email body
            soup = BeautifulSoup(email.HTMLBody, 'html.parser')
            # Find all tables in the email body
            tables = soup.find_all('table')
            # Extract data from tables
            for table in tables:
                df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TABLE OUTPUT
     A      B             C
0    1      2             3
1  AYO  FRATE  C LA FOLIEEE
CSV file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-09-27\output_with_indices.csv
CSV file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-09-27\output_without_indices.csv


In [17]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Parse HTML email body
            soup = BeautifulSoup(email.HTMLBody, 'html.parser')
            # Find all tables in the email body
            tables = soup.find_all('table')
            # Extract data from tables
            for table in tables:
                df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

    # Save DataFrame to Excel with indices
    excel_with_indices_path = os.path.join(folder_path, "output_with_indices.xlsx")
    big_df.to_excel(excel_with_indices_path, index=False)
    print(f"Excel file with indices saved at: {excel_with_indices_path}")

    # Save DataFrame to Excel without indices
    excel_without_indices_path = os.path.join(folder_path, "output_without_indices.xlsx")
    big_df.to_excel(excel_without_indices_path, index=False)
    print(f"Excel file without indices saved at: {excel_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TABLE OUTPUT
     A      B             C
0    1      2             3
1  AYO  FRATE  C LA FOLIEEE
CSV file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-12-19\output_with_indices.csv
CSV file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-12-19\output_without_indices.csv
Excel file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-12-19\output_with_indices.xlsx
Excel file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-12-19\output_without_indices.xlsx


In [18]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Define the date range
start_date = datetime(2024, 2, 25)
end_date = datetime(2024, 2, 27)

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Check if the email was received within the specified date range
            if start_date <= email.ReceivedTime <= end_date:
                # Parse HTML email body
                soup = BeautifulSoup(email.HTMLBody, 'html.parser')
                # Find all tables in the email body
                tables = soup.find_all('table')
                # Extract data from tables
                for table in tables:
                    df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                    all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

    # Save DataFrame to Excel with indices
    excel_with_indices_path = os.path.join(folder_path, "output_with_indices.xlsx")
    big_df.to_excel(excel_with_indices_path, index=False)
    print(f"Excel file with indices saved at: {excel_with_indices_path}")

    # Save DataFrame to Excel without indices
    excel_without_indices_path = os.path.join(folder_path, "output_without_indices.xlsx")
    big_df.to_excel(excel_without_indices_path, index=False)
    print(f"Excel file without indices saved at: {excel_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TypeError: can't compare offset-naive and offset-aware datetimes

In [19]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime, timezone
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Define the date range
start_date = datetime(2024, 2, 25, tzinfo=timezone.utc)  # Assuming the UTC timezone
end_date = datetime(2024, 2, 27, 23, 59, 59, tzinfo=timezone.utc)  # Assuming the UTC timezone

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Check if the email was received within the specified date range
            if start_date <= email.ReceivedTime <= end_date:
                # Parse HTML email body
                soup = BeautifulSoup(email.HTMLBody, 'html.parser')
                # Find all tables in the email body
                tables = soup.find_all('table')
                # Extract data from tables
                for table in tables:
                    df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                    all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

    # Save DataFrame to Excel with indices
    excel_with_indices_path = os.path.join(folder_path, "output_with_indices.xlsx")
    big_df.to_excel(excel_with_indices_path, index=False)
    print(f"Excel file with indices saved at: {excel_with_indices_path}")

    # Save DataFrame to Excel without indices
    excel_without_indices_path = os.path.join(folder_path, "output_without_indices.xlsx")
    big_df.to_excel(excel_without_indices_path, index=False)
    print(f"Excel file without indices saved at: {excel_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TABLE OUTPUT
     A      B             C
0    1      2             3
1  AYO  FRATE  C LA FOLIEEE
CSV file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-51-42\output_with_indices.csv
CSV file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-51-42\output_without_indices.csv
Excel file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-51-42\output_with_indices.xlsx
Excel file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-51-42\output_without_indices.xlsx


In [20]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime, timezone
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Define the date range
start_date = datetime(2024, 2, 25, tzinfo=timezone.utc)  # Assuming the UTC timezone
end_date = datetime.now(timezone.utc)  # Current date and time in UTC

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Check if the email was received within the specified date range
            if start_date <= email.ReceivedTime <= end_date:
                # Parse HTML email body
                soup = BeautifulSoup(email.HTMLBody, 'html.parser')
                # Find all tables in the email body
                tables = soup.find_all('table')
                # Extract data from tables
                for table in tables:
                    df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                    all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

    # Save DataFrame to Excel with indices
    excel_with_indices_path = os.path.join(folder_path, "output_with_indices.xlsx")
    big_df.to_excel(excel_with_indices_path, index=False)
    print(f"Excel file with indices saved at: {excel_with_indices_path}")

    # Save DataFrame to Excel without indices
    excel_without_indices_path = os.path.join(folder_path, "output_without_indices.xlsx")
    big_df.to_excel(excel_without_indices_path, index=False)
    print(f"Excel file without indices saved at: {excel_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TABLE OUTPUT
     A      B             C
0    1      2             3
1  AYO  FRATE  C LA FOLIEEE
CSV file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-54-14\output_with_indices.csv
CSV file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-54-14\output_without_indices.csv
Excel file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-54-14\output_with_indices.xlsx
Excel file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-54-14\output_without_indices.xlsx


In [21]:
import sys
import win32com.client
import pandas as pd
from bs4 import BeautifulSoup  # For parsing HTML email bodies
from datetime import datetime, timezone
import os

# Install required packages if not already installed
def install_packages():
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas', 'pywin32', 'beautifulsoup4'])

# Check if required packages are installed, and install them if necessary
try:
    import pandas
    import win32com
    import bs4
except ImportError:
    print("Some required packages are missing. Installing...")
    install_packages()
    # Import again after installation
    import pandas
    import win32com
    import bs4

# Connect to Outlook
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)  # 6 refers to the Inbox

# Define the date range
start_date = datetime(2024, 2, 25, tzinfo=timezone.utc)  # Assuming the UTC timezone
end_date = datetime.now(timezone.utc)  # Current date and time in UTC

# Initialize a list to store tables from emails
all_tables = []

# Iterate through all emails in the inbox
for email in inbox.Items:
    # Check if the email is from the specified sender
    if email.SenderEmailAddress == 'maxshamiri@gmail.com':
        # Check if the subject contains the desired phrase
        if 'Aloha chouchou' in email.Subject:
            # Check if the email was received within the specified date range
            if start_date <= email.ReceivedTime <= end_date:
                # Parse HTML email body
                soup = BeautifulSoup(email.HTMLBody, 'html.parser')
                # Find all tables in the email body
                tables = soup.find_all('table')
                # Extract data from tables
                for table in tables:
                    df = pd.read_html(str(table), header=0)  # Convert table to DataFrame with header
                    all_tables.extend(df)

# Concatenate all tables into a single DataFrame
if all_tables:
    big_df = pd.concat(all_tables, ignore_index=True)
    # Print the combined DataFrame without duplicating headers
    print("TABLE OUTPUT")
    print(big_df)
    
    # Create a folder with the current date and time
    now = datetime.now()
    folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
    parent_folder_path = "C:\\Users\\maxsh\\OneDrive\\Bureau\\Programming\\Excel files"
    folder_path = os.path.join(parent_folder_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Save DataFrame to CSV with indices
    csv_with_indices_path = os.path.join(folder_path, "output_with_indices.csv")
    big_df.to_csv(csv_with_indices_path, index=False)
    print(f"CSV file with indices saved at: {csv_with_indices_path}")

    # Save DataFrame to CSV without indices
    csv_without_indices_path = os.path.join(folder_path, "output_without_indices.csv")
    big_df.to_csv(csv_without_indices_path, index=False)
    print(f"CSV file without indices saved at: {csv_without_indices_path}")

    # Save DataFrame to Excel with indices
    excel_with_indices_path = os.path.join(folder_path, "output_with_indices.xlsx")
    big_df.to_excel(excel_with_indices_path, index=False)
    print(f"Excel file with indices saved at: {excel_with_indices_path}")

    # Save DataFrame to Excel without indices
    excel_without_indices_path = os.path.join(folder_path, "output_without_indices.xlsx")
    big_df.to_excel(excel_without_indices_path, index=False)
    print(f"Excel file without indices saved at: {excel_without_indices_path}")

else:
    print("No emails from 'maxshamiri@gmail.com' with subject containing 'Aloha chouchou' found.")


TABLE OUTPUT
     A      B             C
0    1      2             3
1  AYO  FRATE  C LA FOLIEEE
CSV file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-55-18\output_with_indices.csv
CSV file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-55-18\output_without_indices.csv
Excel file with indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-55-18\output_with_indices.xlsx
Excel file without indices saved at: C:\Users\maxsh\OneDrive\Bureau\Programming\Excel files\2024-02-27_21-55-18\output_without_indices.xlsx
