In [None]:
%load_ext autoreload
%autoreload 2
# default_exp importers.whatsapp

In [None]:
from hashlib import sha256
from integrators.data.schema import *
from integrators.imports import *
import time

In [None]:
DEFAULT_MATRIX_ADDRESS = "http://localhost:8008"
HOSTNAME = "synapse"
USERNAME = "foo"
PREFIX_SERVICE = "@whatsapp_"
MATRIX_USERNAME = f"@{USERNAME}:{HOSTNAME}"
MATRIX_TOKEN = "MDAxNWxvY2F0aW9uIHN5bmFwc2UKMDAxM2lkZW50aWZpZXIga2V5CjAwMTBjaWQgZ2VuID0gMQowMDFmY2lkIHVzZXJfaWQgPSBAZm9vOnN5bmFwc2UKMDAxNmNpZCB0eXBlID0gYWNjZXNzCjAwMjFjaWQgbm9uY2UgPSAmQHZkbkV2aEdGO0Jsb1NzCjAwMmZzaWduYXR1cmUgAzxgSUYL8xLSwUpbPa3-bHpCD8GnI5mkAVzbOOJufjQK"
BOT_NAME = f"@whatsappbot:{HOSTNAME}"

class MatrixClient:
    def __init__(self):
        self.url = DEFAULT_MATRIX_ADDRESS
        self.username = MATRIX_USERNAME
        self.token = MATRIX_TOKEN
        
    def get_joined_rooms(self):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/joined_rooms?access_token={self.token}")
            if result.status_code != 200:
                print(result, result.content)
                return False
            else:
                json = result.json()
                res = json["joined_rooms"]
                return res
        except requests.exceptions.RequestException as e:
            print(e)
            return None    
    
    def get_joined_members(self, room_id):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/rooms/{room_id}/joined_members?access_token={self.token}")
            if result.status_code != 200:
                print(result, result.content)
                return False
            else:
                json = result.json()
                res = json["joined"]
                return res
        except requests.exceptions.RequestException as e:
            print(e)
            return None          
        
    def send_messages(self, room_id, body):
        try:
            result = requests.post(f"{self.url}/_matrix/client/r0/rooms/{room_id}/send/m.room.message?access_token={self.token}", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                event_id = json["event_id"]
                return event_id
        except requests.exceptions.RequestException as e:
            print(e)
            return None
            
    def get_event_context(self, room_id, event_id):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/rooms/{room_id}/context/{event_id}?limit=1&access_token={self.token}")
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                res = json["events_after"]
                return res
        except requests.exceptions.RequestException as e:
            print(e)
            return None    
        
    def sync_events(self, next_batch):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/sync?since={next_batch}&access_token={self.token}")
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                return json
        except requests.exceptions.RequestException as e:
            print(e)
            return None
        
    def get_profile(self, user_id):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/profile/{user_id}")
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                return json
        except requests.exceptions.RequestException as e:
            print(e)
            return None
        
    def get_room_state(self, room_id):
        try:
            result = requests.get(f"{self.url}/_matrix/client/r0/rooms/{room_id}/state?access_token={self.token}")
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                return json
        except requests.exceptions.RequestException as e:
            print(e)
            return None
        
    def download_file(self, uri):
        try:
            result = requests.get(f"{self.url}/_matrix/media/r0/download/{HOSTNAME}/{uri}")
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                file = result.content
                return file
        except requests.exceptions.RequestException as e:
            print(e)
            return None
    

In [None]:
class WhatsAppImporter:
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.matrix_client = None
        
    def set_matrix_client(self):
        self.matrix_client = MatrixClient()
        
    def get_joined_rooms(self):
        joined_rooms = self.matrix_client.get_joined_rooms()
        return joined_rooms
    
    def get_receivers(self, room):
        joined_members = self.matrix_client.get_joined_members(room)
        receivers = list(joined_members.keys())
        receivers.remove(MATRIX_USERNAME)
        return receivers
    
    def get_bot_room_id(self, joined_rooms):
        for room in joined_rooms:
            joined_members = self.matrix_client.get_joined_members(room)
            if len(joined_members) == 2 and BOT_NAME in joined_members:
                return room
        
    def bot_list_contacts(self, room_id):
        body = {"msgtype":"m.text", "body":"list contacts"}
        event_id = self.matrix_client.send_messages(room_id, body)
        return event_id

    def get_contacts(self, room_id, event_id):
        contacts = self.matrix_client.get_event_context(room_id, event_id)
        return contacts
    
    def sync_events(self, next_batch):
        events = self.matrix_client.sync_events(next_batch)
        return events
    
    def get_room_events(self, events):
        room_events = events["timeline"]["events"]
        return room_events
    
    def get_phone_number(self, contact):
        if not contact.startswith(("#", "* /")):
            parts = contact.split(' - ')
            if len(parts) >= 2:
                phone_number = parts[1][1:-1]
                return phone_number
            
    def create_account(self, phone_number):
        if phone_number is not None:
            user_id = f"{PREFIX_SERVICE}{phone_number}:{HOSTNAME}"
            profile = self.matrix_client.get_profile(user_id)
            avatar_url = None
            if "avatar_url" in profile:
                avatar_url = profile["avatar_url"]
            account = Account(externalId=user_id, displayName=profile["displayname"], avatarUrl=avatar_url, service="whatsapp")
            return account

    def create_message_channel(self, room_id):
        room_state = self.matrix_client.get_room_state(room_id)
        room_name = None
        room_topic = None
        for s in room_state:
            if s["type"] == "m.room.name":
                room_name = s["content"]["name"]
            if s["type"] == "m.room.topic":
                room_topic == s["content"]["topic"]
        message_channel = MessageChannel(externalId=room_id, name=room_name, topic=room_topic)
        return message_channel
    
    def create_message(self, event):         
        message = Message(externalId=event["event_id"], importJson=event["content"], service="whatsapp")
        return message
    
    def create_media(self, content):
        uri = content["url"].split('/')[3]
        binaries = self.matrix_client.download_file(uri)
        sha_file = sha256(binaries).hexdigest()
        file = File(externalId=content["body"], sha256=sha_file)
        
        if content["msgtype"] == "m.image":
            photo = Photo(externalId=content["url"])
            photo.add_edge("file", file)
            return photo
        elif content["msgtype"] == "m.video":
            video = Video(externalId=content["url"], duration=content["info"]["duration"])
            video.add_edge("file", file)
            return video
        elif content["msgtype"] == "m.audio":
            audio = Audio(externalId=content["url"], duration=content["info"]["duration"])
            audio.add_edge("file", file)
            return audio
        elif content["msgtype"] == "m.file":
            document = Document(externalId=content["url"], size=content["info"]["size"])
            document.add_edge("file", file)
            return document

In [None]:
whatsapp_importer = WhatsAppImporter()
whatsapp_importer.set_matrix_client()

In [None]:
all_rooms = whatsapp_importer.get_joined_rooms()

In [None]:
room_id = whatsapp_importer.get_bot_room_id(all_rooms)
event_id = whatsapp_importer.bot_list_contacts(room_id)
time.sleep(1)
contact_list = whatsapp_importer.get_contacts(room_id, event_id)

In [None]:
contacts = contact_list[0]["content"]["body"].split("\n")
numbers = [whatsapp_importer.get_phone_number(c) for c in contacts]
numbers = [x for x in numbers if x is not None]
accounts = [whatsapp_importer.create_account(n) for n in numbers]

In [None]:
for r in all_rooms:
    members = whatsapp_importer.get_receivers(r)
    message_channel = whatsapp_importer.create_message_channel(r)
    for m in members:
        message_channel.add_edge("receiver", m)

In [None]:
batch = "s9_7_0_1_1_1"
sync_events = whatsapp_importer.sync_events(batch)
joined_rooms = sync_events["rooms"]["join"]
for r in joined_rooms:
    room_events = whatsapp_importer.get_room_events(sync_events["rooms"]["join"][r])
    for e in room_events:
        message = whatsapp_importer.create_message(e)
        message.add_edge("messageChannel", r)
        message.add_edge("sender", e["sender"])
#         if "m.relates_to" in e["content"]:
#             message.add_edge("replyTo", e["content"]["m.relates_to"]["m.in_reply_to"]["event_id"])        
        if "info" in e["content"]:
            media = whatsapp_importer.create_media(e["content"])
            if e["content"]["msgtype"] == "m.video":
                message.add_edge("video", media)
            elif e["content"]["msgtype"] == "m.image":
                message.add_edge("photo", media)
            elif e["content"]["msgtype"] == "m.image":
                message.add_edge("photo", media)
`