# LOSSLESS DATA COMPRESSION AND TRANSMISSION

## LOADING DATASET

In [28]:
import sys
with open('new_dataset.txt', 'r') as file:
    data = file.read().rstrip()

In [29]:
sys.getsizeof(data)

60049

In [30]:
type(data)

str

In [31]:
data

'abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc

## ENCRYPTION

In [32]:
def encryptRailFence(text, key):


    rail = [['\n' for i in range(len(text))]
            for j in range(key)]

    # to find the direction
    dir_down = False
    row, col = 0, 0

    for i in range(len(text)):


        if (row == 0) or (row == key - 1):
            dir_down = not dir_down


        rail[row][col] = text[i]
        col += 1


        if dir_down:
            row += 1
        else:
            row -= 1

    result = []
    for i in range(key):
        for j in range(len(text)):
            if rail[i][j] != '\n':
                result.append(rail[i][j])
    return("" . join(result))


## ARITHMETIC ENCODING-DECODING(COMPRESSION)

In [33]:
import decimal
from decimal import Decimal


class ArithmeticEncoding:


    def __init__(self, frequency_table, save_stages=False):

        self.save_stages = save_stages
        if (save_stages == True):
            print("WARNING: Setting save_stages=True may cause memory overflow if the message is large.")

        self.probability_table = self.get_probability_table(frequency_table)

    def get_probability_table(self, frequency_table):

        total_frequency = sum(list(frequency_table.values()))

        probability_table = {}
        for key, value in frequency_table.items():
            probability_table[key] = value / total_frequency

        return probability_table

    def get_encoded_value(self, last_stage_probs):

        last_stage_probs = list(last_stage_probs.values())
        last_stage_values = []
        for sublist in last_stage_probs:
            for element in sublist:
                last_stage_values.append(element)

        last_stage_min = min(last_stage_values)
        last_stage_max = max(last_stage_values)
        encoded_value = (last_stage_min + last_stage_max) / 2

        return last_stage_min, last_stage_max, encoded_value

    def process_stage(self, probability_table, stage_min, stage_max):

        stage_probs = {}
        stage_domain = stage_max - stage_min
        for term_idx in range(len(probability_table.items())):
            term = list(probability_table.keys())[term_idx]
            term_prob = Decimal(probability_table[term])
            cum_prob = term_prob * stage_domain + stage_min
            stage_probs[term] = [stage_min, cum_prob]
            stage_min = cum_prob
        return stage_probs

    def encode(self, msg, probability_table):

        msg = list(msg)

        encoder = []

        stage_min = Decimal(0.0)
        stage_max = Decimal(1.0)

        for msg_term_idx in range(len(msg)):
            stage_probs = self.process_stage(probability_table, stage_min, stage_max)

            msg_term = msg[msg_term_idx]
            stage_min = stage_probs[msg_term][0]
            stage_max = stage_probs[msg_term][1]

            if self.save_stages:
                encoder.append(stage_probs)

        last_stage_probs = self.process_stage(probability_table, stage_min, stage_max)

        if self.save_stages:
            encoder.append(last_stage_probs)

        interval_min_value, interval_max_value, encoded_msg = self.get_encoded_value(last_stage_probs)

        return encoded_msg, encoder, interval_min_value, interval_max_value


    def decode(self, encoded_msg, msg_length, probability_table):

        decoder = []

        decoded_msg = []

        stage_min = Decimal(0.0)
        stage_max = Decimal(1.0)

        for idx in range(msg_length):
            stage_probs = self.process_stage(probability_table, stage_min, stage_max)

            for msg_term, value in stage_probs.items():
                if encoded_msg >= value[0] and encoded_msg <= value[1]:
                    break

            decoded_msg.append(msg_term)

            stage_min = stage_probs[msg_term][0]
            stage_max = stage_probs[msg_term][1]

            if self.save_stages:
                decoder.append(stage_probs)

        if self.save_stages:
            last_stage_probs = self.process_stage(probability_table, stage_min, stage_max)
            decoder.append(last_stage_probs)

        return decoded_msg, decoder


## FUNCTION CALL

In [34]:
rail = 5
encr = encryptRailFence(data, rail)

In [35]:
encr

'acbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacbacb

In [36]:
from collections import Counter
res = Counter(encr)

In [37]:
freq = {}
freq = dict(res)

In [38]:
import random
new_freq = freq
for k in new_freq:
    new_freq[k] = random.randint(1,10)

In [39]:
new_freq

{'a': 5, 'c': 10, 'b': 6}

In [40]:
%%time
from decimal import getcontext

getcontext().prec = 38000

AE = ArithmeticEncoding(frequency_table=new_freq,
                        save_stages=False)

original_msg = 'abc'*20000
encrypted_msg = encr

text_file = open("original.txt", "w")
#write string to file
text_file.write(original_msg)
#close file
text_file.close()

#print("Original Message: {msg}".format(msg=original_msg))

encoded_msg, encoder, interval_min_value, interval_max_value = AE.encode(msg=encrypted_msg,
                                                                         probability_table=AE.probability_table)

print("Encoded Message: {msg}".format(msg=encoded_msg))
comp_file = open("compressed.txt", "w")
#write string to file
comp_file.write(str(encoded_msg))
#close file
comp_file.close()


Encoded Message: 0.142283227318379623892950090664141732197379785948140237893667929537962001752167267022393829813259233775303763328867196177675620279929717518481057407940045827574440183228198811935778101324883618948140592437892447850026695909855831237444553111887521666012529179882342999864383629838569932219303890178993403555222886143153538441578844605855979737683289266980590642307309545182607750076817931820889908317584796386972774128426960090922429368352026802318381723020631723806056032272897487391896638185842494549423724212925610540393567670854458329940208262573532309201512774777100969202664967000637445901314051417773725509983879427098916321235706570764267373562417378124187337717165625125065796752940216225119104639520908127376866987616240152586942364316682365409467411662053131234361612079223036875484499887264947455718804524665251929153560563651389822997121317626050576260749355686116626748150208928769522602805624844362188117049564352538218933694248673809637190319701088483877253442384317

## FILE SENDING

In [14]:
import socket
import tqdm
import os
import pickle

SEPARATOR = "<SEPARATOR>"
BUFFER_SIZE = 4096
s = socket.socket()
host = "SSF"
port = 5555
print(f"[+] Connecting to {host}:{port}")
s.connect((host, port))
print("[+] Connected to ", host)
filename = "D:\python_prog\ArithmeticEncodingPython-main\compressed.txt"
#key = 276
#mlen= len(original_msg)
filesize = os.path.getsize(filename)
s.send(f"{filename}{SEPARATOR}{filesize}".encode())

progress = tqdm.tqdm(range(filesize), f"Sending {filename}", unit="B", unit_scale=True, unit_divisor=1024,
                     mininterval=0, miniters=0)
with open(filename, "rb") as f:
    while True:
        bytes_read = f.read(BUFFER_SIZE)
        if not bytes_read:
            break
        s.sendall(bytes_read)
        progress.update(len(bytes_read))
s.close()

slen = socket.socket()
lenx = len(original_msg)
host = "SSF"
port = 5555
slen.connect((host, port))
print(f"[+] Sending mlen to {host}:{port}")
slen.send(str(lenx).encode())
print(f"[+] mlen sent to {host}:{port}")
slen.close()

snew = socket.socket()
host = "SSF"
port = 5555
key = rail
print(f"[+] Sending key to {host}:{port}")
snew.connect((host, port))
snew.send(str(key).encode())
print(f"[+] key sent to {host}:{port}")
snew.close()

slas = socket.socket()
host = "SSF"
port = 5555
data = pickle.dumps(new_freq)
print(f"[+] Sending dict to {host}:{port}")
slas.connect((host, port))
slas.send(data)
print(f"[+] dict sent to {host}:{port}")
slas.close()

[+] Connecting to SSF:5555
[+] Connected to  SSF


Sending D:\python_prog\ArithmeticEncodingPython-main\compressed.txt: 100%|██████████| 37.1k/37.1k [00:00<00:00, 2.75MB/s]

[+] Sending mlen to SSF:5555
[+] mlen sent to SSF:5555
[+] Sending key to SSF:5555
[+] key sent to SSF:5555
[+] Sending dict to SSF:5555
[+] dict sent to SSF:5555
